diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2002-12-27 23:51:46 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2002-12-27 23:51:46 +0000 |
commit | 1457ab523343e94e094ad1c60de37077f8dc5589 (patch) | |
tree | 2df86f0b66c5df4c373dec5809a1f62c563df901 /libavcodec | |
parent | ac97734133a52c41825e427fd15a66f65a89d4bb (diff) | |
download | ffmpeg-1457ab523343e94e094ad1c60de37077f8dc5589.tar.gz |
qpel encoding
4mv+b frames encoding finally fixed
chroma ME
5 comparission functions for ME
b frame encoding speedup
wmv2 codec (unfinished)
user specified diamond size for EPZS
Originally committed as revision 1365 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/allcodecs.c | 4 | ||||
-rw-r--r-- | libavcodec/avcodec.h | 39 | ||||
-rw-r--r-- | libavcodec/dsputil.c | 308 | ||||
-rw-r--r-- | libavcodec/dsputil.h | 35 | ||||
-rw-r--r-- | libavcodec/h263.c | 175 | ||||
-rw-r--r-- | libavcodec/h263dec.c | 67 | ||||
-rw-r--r-- | libavcodec/i386/dsputil_mmx.c | 161 | ||||
-rw-r--r-- | libavcodec/i386/motion_est_mmx.c | 19 | ||||
-rw-r--r-- | libavcodec/motion_est.c | 1149 | ||||
-rw-r--r-- | libavcodec/motion_est_template.c | 737 | ||||
-rw-r--r-- | libavcodec/mpeg12.c | 2 | ||||
-rw-r--r-- | libavcodec/mpegvideo.c | 188 | ||||
-rw-r--r-- | libavcodec/mpegvideo.h | 60 | ||||
-rw-r--r-- | libavcodec/msmpeg4.c | 55 | ||||
-rw-r--r-- | libavcodec/msmpeg4data.h | 241 | ||||
-rw-r--r-- | libavcodec/simple_idct.c | 90 | ||||
-rw-r--r-- | libavcodec/simple_idct.h | 3 | ||||
-rw-r--r-- | libavcodec/wmv2.c | 850 |
18 files changed, 3213 insertions, 970 deletions
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c index 286221dbca..a5d2e41cfb 100644 --- a/libavcodec/allcodecs.c +++ b/libavcodec/allcodecs.c @@ -53,7 +53,7 @@ void avcodec_register_all(void) register_avcodec(&msmpeg4v2_encoder); register_avcodec(&msmpeg4v3_encoder); register_avcodec(&wmv1_encoder); -// register_avcodec(&wmv2_encoder); + register_avcodec(&wmv2_encoder); register_avcodec(&huffyuv_encoder); #endif /* CONFIG_ENCODERS */ register_avcodec(&rawvideo_codec); @@ -66,7 +66,7 @@ void avcodec_register_all(void) register_avcodec(&msmpeg4v2_decoder); register_avcodec(&msmpeg4v3_decoder); register_avcodec(&wmv1_decoder); -// register_avcodec(&wmv2_decoder); + register_avcodec(&wmv2_decoder); register_avcodec(&mpeg_decoder); register_avcodec(&h263i_decoder); register_avcodec(&rv10_decoder); diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h index 606cfd814b..a2a1d3428e 100644 --- a/libavcodec/avcodec.h +++ b/libavcodec/avcodec.h @@ -5,8 +5,8 @@ #define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION "0.4.6" -#define LIBAVCODEC_BUILD 4646 -#define LIBAVCODEC_BUILD_STR "4646" +#define LIBAVCODEC_BUILD 4647 +#define LIBAVCODEC_BUILD_STR "4647" enum CodecID { CODEC_ID_NONE, @@ -850,6 +850,41 @@ typedef struct AVCodecContext { * decoding: unused */ int mb_qmax; + + /** + * motion estimation compare function + * encoding: set by user. + * decoding: unused + */ + int me_cmp; + /** + * subpixel motion estimation compare function + * encoding: set by user. + * decoding: unused + */ + int me_sub_cmp; + /** + * macroblock compare function (not supported yet) + * encoding: set by user. + * decoding: unused + */ + int mb_cmp; +#define FF_CMP_SAD 0 +#define FF_CMP_SSE 1 +#define FF_CMP_SATD 2 +#define FF_CMP_DCT 3 +#define FF_CMP_PSNR 4 +#define FF_CMP_BIT 5 +#define FF_CMP_RD 6 +#define FF_CMP_ZERO 7 +#define FF_CMP_CHROMA 256 + + /** + * ME diamond size + * encoding: set by user. + * decoding: unused + */ + int dia_size; } AVCodecContext; typedef struct AVCodec { diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c index 1e177116a4..c48c71119b 100644 --- a/libavcodec/dsputil.c +++ b/libavcodec/dsputil.c @@ -20,6 +20,7 @@ */ #include "avcodec.h" #include "dsputil.h" +#include "mpegvideo.h" int ff_bit_exact=0; @@ -144,7 +145,28 @@ static int pix_norm1_c(UINT8 * pix, int line_size) } -static int pix_norm_c(UINT8 * pix1, UINT8 * pix2, int line_size) +static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) +{ + int s, i; + UINT32 *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < 8; i++) { + s += sq[pix1[0] - pix2[0]]; + s += sq[pix1[1] - pix2[1]]; + s += sq[pix1[2] - pix2[2]]; + s += sq[pix1[3] - pix2[3]]; + s += sq[pix1[4] - pix2[4]]; + s += sq[pix1[5] - pix2[5]]; + s += sq[pix1[6] - pix2[6]]; + s += sq[pix1[7] - pix2[7]]; + pix1 += line_size; + pix2 += line_size; + } + return s; +} + +static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size) { int s, i, j; UINT32 *sq = squareTbl + 256; @@ -1141,7 +1163,103 @@ QPEL_MC(0, avg_ , _ , op_avg) #undef op_put #undef op_put_no_rnd -static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){ + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int i; + + for(i=0; i<h; i++){ + dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4]; + dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4]; + dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4]; + dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4]; + dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4]; + dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4]; + dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4]; + dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4]; + dst+=dstStride; + src+=srcStride; + } +} + +static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ + uint8_t *cm = cropTbl + MAX_NEG_CROP; + int i; + + for(i=0; i<w; i++){ + const int src_1= src[ -srcStride]; + const int src0 = src[0 ]; + const int src1 = src[ srcStride]; + const int src2 = src[2*srcStride]; + const int src3 = src[3*srcStride]; + const int src4 = src[4*srcStride]; + const int src5 = src[5*srcStride]; + const int src6 = src[6*srcStride]; + const int src7 = src[7*srcStride]; + const int src8 = src[8*srcStride]; + const int src9 = src[9*srcStride]; + dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; + dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; + dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; + dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; + dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; + dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; + dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; + dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; + src++; + dst++; + } +} + +static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){ + put_pixels8_c(dst, src, stride, 8); +} + +static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t half[64]; + wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); + put_pixels8_l2(dst, src, half, stride, stride, 8, 8); +} + +static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){ + wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8); +} + +static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t half[64]; + wmv2_mspel8_h_lowpass(half, src, 8, stride, 8); + put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8); +} + +static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){ + wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8); +} + +static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t halfH[88]; + uint8_t halfV[64]; + uint8_t halfHV[64]; + wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); + wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8); + wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); + put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); +} +static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t halfH[88]; + uint8_t halfV[64]; + uint8_t halfHV[64]; + wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); + wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8); + wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8); + put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8); +} +static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){ + uint8_t halfH[88]; + wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11); + wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8); +} + + +static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size) { int s, i; @@ -1257,7 +1375,7 @@ static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } -static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) +static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size) { int s, i; @@ -1341,6 +1459,14 @@ static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size) return s; } +static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){ + return pix_abs16x16_c(a,b,stride); +} + +static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){ + return pix_abs8x8_c(a,b,stride); +} + void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last) { int i; @@ -1399,6 +1525,156 @@ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ dst[i+0] = src1[i+0]-src2[i+0]; } +#define BUTTERFLY2(o1,o2,i1,i2) \ +o1= (i1)+(i2);\ +o2= (i1)-(i2); + +#define BUTTERFLY1(x,y) \ +{\ + int a,b;\ + a= x;\ + b= y;\ + x= a+b;\ + y= a-b;\ +} + +#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y))) + +static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){ + int i; + int temp[64]; + int sum=0; + + for(i=0; i<8; i++){ + //FIXME try pointer walks + BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]); + BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]); + BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]); + BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]); + + BUTTERFLY1(temp[8*i+0], temp[8*i+2]); + BUTTERFLY1(temp[8*i+1], temp[8*i+3]); + BUTTERFLY1(temp[8*i+4], temp[8*i+6]); + BUTTERFLY1(temp[8*i+5], temp[8*i+7]); + + BUTTERFLY1(temp[8*i+0], temp[8*i+4]); + BUTTERFLY1(temp[8*i+1], temp[8*i+5]); + BUTTERFLY1(temp[8*i+2], temp[8*i+6]); + BUTTERFLY1(temp[8*i+3], temp[8*i+7]); + } + + for(i=0; i<8; i++){ + BUTTERFLY1(temp[8*0+i], temp[8*1+i]); + BUTTERFLY1(temp[8*2+i], temp[8*3+i]); + BUTTERFLY1(temp[8*4+i], temp[8*5+i]); + BUTTERFLY1(temp[8*6+i], temp[8*7+i]); + + BUTTERFLY1(temp[8*0+i], temp[8*2+i]); + BUTTERFLY1(temp[8*1+i], temp[8*3+i]); + BUTTERFLY1(temp[8*4+i], temp[8*6+i]); + BUTTERFLY1(temp[8*5+i], temp[8*7+i]); + + sum += + BUTTERFLYA(temp[8*0+i], temp[8*4+i]) + +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) + +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) + +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); + } +#if 0 +static int maxi=0; +if(sum>maxi){ + maxi=sum; + printf("MAX:%d\n", maxi); +} +#endif + return sum; +} + +static int hadamard8_abs_c(uint8_t *src, int stride, int mean){ + int i; + int temp[64]; + int sum=0; +//FIXME OOOPS ignore 0 term instead of mean mess + for(i=0; i<8; i++){ + //FIXME try pointer walks + BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean); + BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean); + BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean); + BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean); + + BUTTERFLY1(temp[8*i+0], temp[8*i+2]); + BUTTERFLY1(temp[8*i+1], temp[8*i+3]); + BUTTERFLY1(temp[8*i+4], temp[8*i+6]); + BUTTERFLY1(temp[8*i+5], temp[8*i+7]); + + BUTTERFLY1(temp[8*i+0], temp[8*i+4]); + BUTTERFLY1(temp[8*i+1], temp[8*i+5]); + BUTTERFLY1(temp[8*i+2], temp[8*i+6]); + BUTTERFLY1(temp[8*i+3], temp[8*i+7]); + } + + for(i=0; i<8; i++){ + BUTTERFLY1(temp[8*0+i], temp[8*1+i]); + BUTTERFLY1(temp[8*2+i], temp[8*3+i]); + BUTTERFLY1(temp[8*4+i], temp[8*5+i]); + BUTTERFLY1(temp[8*6+i], temp[8*7+i]); + + BUTTERFLY1(temp[8*0+i], temp[8*2+i]); + BUTTERFLY1(temp[8*1+i], temp[8*3+i]); + BUTTERFLY1(temp[8*4+i], temp[8*6+i]); + BUTTERFLY1(temp[8*5+i], temp[8*7+i]); + + sum += + BUTTERFLYA(temp[8*0+i], temp[8*4+i]) + +BUTTERFLYA(temp[8*1+i], temp[8*5+i]) + +BUTTERFLYA(temp[8*2+i], temp[8*6+i]) + +BUTTERFLYA(temp[8*3+i], temp[8*7+i]); + } + + return sum; +} + +static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ + MpegEncContext * const s= (MpegEncContext *)c; + DCTELEM temp[64]; + int sum=0, i; + + s->dsp.diff_pixels(temp, src1, src2, stride); + s->fdct(temp); + + for(i=0; i<64; i++) + sum+= ABS(temp[i]); + + return sum; +} + +void simple_idct(INT16 *block); //FIXME + +static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ + MpegEncContext * const s= (MpegEncContext *)c; + DCTELEM temp[64], bak[64]; + int sum=0, i; + + s->mb_intra=0; + + s->dsp.diff_pixels(temp, src1, src2, stride); + + memcpy(bak, temp, 64*sizeof(DCTELEM)); + + s->dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + s->dct_unquantize(s, temp, 0, s->qscale); + simple_idct(temp); //FIXME + + for(i=0; i<64; i++) + sum+= (temp[i]-bak[i])*(temp[i]-bak[i]); + + return sum; +} + +WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c) +WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c) +WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c) + void dsputil_init(DSPContext* c, unsigned mask) { static int init_done = 0; @@ -1429,7 +1705,8 @@ void dsputil_init(DSPContext* c, unsigned mask) c->clear_blocks = clear_blocks_c; c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; - c->pix_norm = pix_norm_c; + c->sse[0]= sse16_c; + c->sse[1]= sse8_c; /* TODO [0] 16 [1] 8 */ c->pix_abs16x16 = pix_abs16x16_c; @@ -1489,6 +1766,28 @@ void dsputil_init(DSPContext* c, unsigned mask) /* dspfunc(avg_no_rnd_qpel, 1, 8); */ #undef dspfunc + c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c; + c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c; + c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c; + c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c; + c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c; + c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c; + c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c; + c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c; + + c->hadamard8_diff[0]= hadamard8_diff16_c; + c->hadamard8_diff[1]= hadamard8_diff_c; + c->hadamard8_abs = hadamard8_abs_c; + + c->dct_sad[0]= dct_sad16x16_c; + c->dct_sad[1]= dct_sad8x8_c; + + c->sad[0]= sad16x16_c; + c->sad[1]= sad8x8_c; + + c->quant_psnr[0]= quant_psnr16x16_c; + c->quant_psnr[1]= quant_psnr8x8_c; + c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; @@ -1516,7 +1815,6 @@ void dsputil_init(DSPContext* c, unsigned mask) #ifdef HAVE_MMI dsputil_init_mmi(c, mask); #endif - } /* remove any non bit exact operation (testing purpose) */ diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 29aca1ac22..b2cac91c9c 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -79,13 +79,10 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ /* motion estimation */ -typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); -/* -int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); -int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); -*/ +typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/; + +typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/; + typedef struct DSPContext { /* pixel ops : interface with DCT */ void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); @@ -98,7 +95,16 @@ typedef struct DSPContext { void (*clear_blocks)(DCTELEM *blocks/*align 16*/); int (*pix_sum)(UINT8 * pix, int line_size); int (*pix_norm1)(UINT8 * pix, int line_size); - int (*pix_norm)(UINT8 * pix1, UINT8 * pix2, int line_size); + me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */ + me_cmp_func sse[2]; + me_cmp_func hadamard8_diff[2]; + me_cmp_func dct_sad[2]; + me_cmp_func quant_psnr[2]; + int (*hadamard8_abs )(uint8_t *src, int stride, int mean); + + me_cmp_func me_cmp[11]; + me_cmp_func me_sub_cmp[11]; + me_cmp_func mb_cmp[11]; /* maybe create an array for 16/8 functions */ op_pixels_func put_pixels_tab[2][4]; @@ -109,6 +115,7 @@ typedef struct DSPContext { qpel_mc_func avg_qpel_pixels_tab[2][16]; qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; + qpel_mc_func put_mspel_pixels_tab[8]; op_pixels_abs_func pix_abs16x16; op_pixels_abs_func pix_abs16x16_x2; @@ -120,9 +127,8 @@ typedef struct DSPContext { op_pixels_abs_func pix_abs8x8_xy2; /* huffyuv specific */ - //FIXME note: alignment isnt guranteed currently but could be if needed void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); - void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/,int w); + void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); } DSPContext; void dsputil_init(DSPContext* p, unsigned mask); @@ -156,6 +162,7 @@ static inline void emms(void) __asm __volatile ("emms;":::"memory"); } + #define emms_c() \ {\ if (mm_flags & MM_MMX)\ @@ -281,6 +288,14 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input, FFTSample *tmp); void ff_mdct_end(MDCTContext *s); +#define WARPER88_1616(name8, name16)\ +static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\ + return name8(s, dst , src , stride)\ + +name8(s, dst+8 , src+8 , stride)\ + +name8(s, dst +8*stride, src +8*stride, stride)\ + +name8(s, dst+8+8*stride, src+8+8*stride, stride);\ +} + #ifndef HAVE_LRINTF /* XXX: add ISOC specific test to avoid specific BSD testing. */ /* better than nothing implementation. */ diff --git a/libavcodec/h263.c b/libavcodec/h263.c index bbeea3abd4..239bba8bab 100644 --- a/libavcodec/h263.c +++ b/libavcodec/h263.c @@ -204,10 +204,6 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */ put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */ - if (s->pict_type == I_TYPE) - s->no_rounding = 0; - else - s->no_rounding ^= 1; put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */ put_bits(&s->pb,2,0); /* Reserved */ put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ @@ -392,6 +388,57 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){ } } +void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){ + const int mb_index= s->mb_x + s->mb_y*s->mb_width; + int xy= s->block_index[0]; + uint16_t time_pp= s->pp_time; + uint16_t time_pb= s->pb_time; + int i; + + //FIXME avoid divides + switch(s->co_located_type_table[mb_index]){ + case 0: + s->mv_type= MV_TYPE_16X16; + s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx; + s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my; + s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0] + : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp; + s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] + : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp; + break; + case CO_LOCATED_TYPE_4MV: + s->mv_type = MV_TYPE_8X8; + for(i=0; i<4; i++){ + xy= s->block_index[i]; + s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx; + s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0] + : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp; + s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1] + : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp; + } + break; + case CO_LOCATED_TYPE_FIELDMV: + s->mv_type = MV_TYPE_FIELD; + for(i=0; i<2; i++){ + if(s->top_field_first){ + time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i; + time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i; + }else{ + time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i; + time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i; + } + s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx; + s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my; + s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0] + : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp; + s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] + : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp; + } + break; + } +} + #ifdef CONFIG_ENCODERS void mpeg4_encode_mb(MpegEncContext * s, DCTELEM block[6][64], @@ -442,7 +489,7 @@ void mpeg4_encode_mb(MpegEncContext * s, return; } - + if ((cbp | motion_x | motion_y | mb_type) ==0) { /* direct MB with MV={0,0} */ assert(s->dquant==0); @@ -1386,7 +1433,7 @@ void h263_encode_init(MpegEncContext *s) init_mv_penalty_and_fcode(s); } - s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p + s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p // use fcodes >1 only for mpeg4 & h263 & h263p FIXME switch(s->codec_id){ @@ -1519,7 +1566,7 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){ static void mpeg4_encode_vol_header(MpegEncContext * s) { - int vo_ver_id=1; //must be 2 if we want GMC or q-pel + int vo_ver_id=2; //must be 2 if we want GMC or q-pel char buf[255]; if(s->max_b_frames){ @@ -1584,7 +1631,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s) if(s->mpeg_quant) put_bits(&s->pb, 2, 0); /* no custom matrixes */ if (vo_ver_id != 1) - put_bits(&s->pb, 1, s->quarter_sample=0); + put_bits(&s->pb, 1, s->quarter_sample); put_bits(&s->pb, 1, 1); /* complexity estimation disable */ s->resync_marker= s->rtp_mode; put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */ @@ -1618,7 +1665,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) int time_div, time_mod; if(s->pict_type==I_TYPE){ - s->no_rounding=0; if(picture_number==0 || !s->strict_std_compliance) mpeg4_encode_vol_header(s); } @@ -1645,7 +1691,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 1, 1); /* vop coded */ if ( s->pict_type == P_TYPE || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) { - s->no_rounding ^= 1; put_bits(&s->pb, 1, s->no_rounding); /* rounding type */ } put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */ @@ -1996,6 +2041,61 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n } #endif } + +static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, + UINT8 *scan_table) +{ + int i, last_non_zero; + const RLTable *rl; + UINT8 *len_tab; + const int last_index = s->block_last_index[n]; + int len=0; + + if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away + /* mpeg4 based DC predictor */ + //mpeg4_encode_dc(dc_pb, intra_dc, n); //FIXME + if(last_index<1) return len; + i = 1; + rl = &rl_intra; + len_tab = uni_mpeg4_intra_rl_len; + } else { + if(last_index<0) return 0; + i = 0; + rl = &rl_inter; + len_tab = uni_mpeg4_inter_rl_len; + } + + /* AC coefs */ + last_non_zero = i - 1; + for (; i < last_index; i++) { + int level = block[ scan_table[i] ]; + if (level) { + int run = i - last_non_zero - 1; + level+=64; + if((level&(~127)) == 0){ + const int index= UNI_MPEG4_ENC_INDEX(0, run, level); + len += len_tab[index]; + }else{ //ESC3 + len += 7+2+1+6+1+12+1; + } + last_non_zero = i; + } + } + /*if(i<=last_index)*/{ + int level = block[ scan_table[i] ]; + int run = i - last_non_zero - 1; + level+=64; + if((level&(~127)) == 0){ + const int index= UNI_MPEG4_ENC_INDEX(1, run, level); + len += len_tab[index]; + }else{ //ESC3 + len += 7+2+1+6+1+12+1; + } + } + + return len; +} + #endif @@ -3050,8 +3150,6 @@ int ff_h263_decode_mb(MpegEncContext *s, int modb1; // first bit of modb int modb2; // second bit of modb int mb_type; - uint16_t time_pp; - uint16_t time_pb; int xy; s->mb_intra = 0; //B-frames never contain intra blocks @@ -3173,9 +3271,6 @@ int ff_h263_decode_mb(MpegEncContext *s, } if(mb_type==4 || mb_type==MB_TYPE_B_DIRECT){ - int mb_index= s->mb_x + s->mb_y*s->mb_width; - int i; - if(mb_type==4) mx=my=0; else{ @@ -3184,55 +3279,7 @@ int ff_h263_decode_mb(MpegEncContext *s, } s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; - xy= s->block_index[0]; - time_pp= s->pp_time; - time_pb= s->pb_time; - - //FIXME avoid divides - switch(s->co_located_type_table[mb_index]){ - case 0: - s->mv_type= MV_TYPE_16X16; - s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx; - s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my; - s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0] - : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp; - s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1] - : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp; - PRINT_MB_TYPE(mb_type==4 ? "D" : "S"); - break; - case CO_LOCATED_TYPE_4MV: - s->mv_type = MV_TYPE_8X8; - for(i=0; i<4; i++){ - xy= s->block_index[i]; - s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx; - s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my; - s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0] - : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp; - s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1] - : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp; - } - PRINT_MB_TYPE("4"); - break; - case CO_LOCATED_TYPE_FIELDMV: - s->mv_type = MV_TYPE_FIELD; - for(i=0; i<2; i++){ - if(s->top_field_first){ - time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i; - time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i; - }else{ - time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i; - time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i; - } - s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx; - s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my; - s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0] - : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp; - s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1] - : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp; - } - PRINT_MB_TYPE("="); - break; - } + ff_mpeg4_set_direct_mv(s, mx, my); } if(mb_type<0 || mb_type>4){ diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c index e7c49237e9..9a22310c17 100644 --- a/libavcodec/h263dec.c +++ b/libavcodec/h263dec.c @@ -40,7 +40,7 @@ static inline long long rdtsc() } #endif -static int h263_decode_init(AVCodecContext *avctx) +int ff_h263_decode_init(AVCodecContext *avctx) { MpegEncContext *s = avctx->priv_data; @@ -113,7 +113,7 @@ static int h263_decode_init(AVCodecContext *avctx) return 0; } -static int h263_decode_end(AVCodecContext *avctx) +int ff_h263_decode_end(AVCodecContext *avctx) { MpegEncContext *s = avctx->priv_data; @@ -343,7 +343,7 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){ return -1; } -static int h263_decode_frame(AVCodecContext *avctx, +int ff_h263_decode_frame(AVCodecContext *avctx, void *data, int *data_size, UINT8 *buf, int buf_size) { @@ -416,9 +416,11 @@ retry: if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix return -1; } - + /* let's go :-) */ - if (s->h263_msmpeg4) { + if (s->msmpeg4_version==5) { + ret= ff_wmv2_decode_picture_header(s); + } else if (s->msmpeg4_version) { ret = msmpeg4_decode_picture_header(s); } else if (s->h263_pred) { if(s->avctx->extradata_size && s->picture_number==0){ @@ -634,7 +636,6 @@ retry: } if(num_end_markers || error){ fprintf(stderr, "concealing errors\n"); -//printf("type:%d\n", s->pict_type); ff_error_resilience(s); } } @@ -713,10 +714,10 @@ AVCodec mpeg4_decoder = { CODEC_TYPE_VIDEO, CODEC_ID_MPEG4, sizeof(MpegEncContext), - h263_decode_init, + ff_h263_decode_init, NULL, - h263_decode_end, - h263_decode_frame, + ff_h263_decode_end, + ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED, }; @@ -725,10 +726,10 @@ AVCodec h263_decoder = { CODEC_TYPE_VIDEO, CODEC_ID_H263, sizeof(MpegEncContext), - h263_decode_init, + ff_h263_decode_init, NULL, - h263_decode_end, - h263_decode_frame, + ff_h263_decode_end, + ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, }; @@ -737,10 +738,10 @@ AVCodec msmpeg4v1_decoder = { CODEC_TYPE_VIDEO, CODEC_ID_MSMPEG4V1, sizeof(MpegEncContext), - h263_decode_init, + ff_h263_decode_init, NULL, - h263_decode_end, - h263_decode_frame, + ff_h263_decode_end, + ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, }; @@ -749,10 +750,10 @@ AVCodec msmpeg4v2_decoder = { CODEC_TYPE_VIDEO, CODEC_ID_MSMPEG4V2, sizeof(MpegEncContext), - h263_decode_init, + ff_h263_decode_init, NULL, - h263_decode_end, - h263_decode_frame, + ff_h263_decode_end, + ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, }; @@ -761,10 +762,10 @@ AVCodec msmpeg4v3_decoder = { CODEC_TYPE_VIDEO, CODEC_ID_MSMPEG4V3, sizeof(MpegEncContext), - h263_decode_init, + ff_h263_decode_init, NULL, - h263_decode_end, - h263_decode_frame, + ff_h263_decode_end, + ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, }; @@ -773,22 +774,10 @@ AVCodec wmv1_decoder = { CODEC_TYPE_VIDEO, CODEC_ID_WMV1, sizeof(MpegEncContext), - h263_decode_init, - NULL, - h263_decode_end, - h263_decode_frame, - CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, -}; - -AVCodec wmv2_decoder = { - "wmv2", - CODEC_TYPE_VIDEO, - CODEC_ID_WMV2, - sizeof(MpegEncContext), - h263_decode_init, + ff_h263_decode_init, NULL, - h263_decode_end, - h263_decode_frame, + ff_h263_decode_end, + ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, }; @@ -797,10 +786,10 @@ AVCodec h263i_decoder = { CODEC_TYPE_VIDEO, CODEC_ID_H263I, sizeof(MpegEncContext), - h263_decode_init, + ff_h263_decode_init, NULL, - h263_decode_end, - h263_decode_frame, + ff_h263_decode_end, + ff_h263_decode_frame, CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, }; diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 12a3601546..b9ebc31136 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -43,6 +43,11 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); +int sad16x16_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx); +int sad8x8_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx); +int sad16x16_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx); +int sad8x8_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx); + /* pixel operations */ static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; @@ -213,7 +218,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size) ); } -static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) +static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride) { asm volatile( "pxor %%mm7, %%mm7 \n\t" @@ -496,7 +501,150 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ for(; i<w; i++) dst[i+0] = src1[i+0]-src2[i+0]; } +#define LBUTTERFLY(a,b)\ + "paddw " #b ", " #a " \n\t"\ + "paddw " #b ", " #b " \n\t"\ + "psubw " #a ", " #b " \n\t" + +#define HADAMARD48\ + LBUTTERFLY(%%mm0, %%mm1)\ + LBUTTERFLY(%%mm2, %%mm3)\ + LBUTTERFLY(%%mm4, %%mm5)\ + LBUTTERFLY(%%mm6, %%mm7)\ + \ + LBUTTERFLY(%%mm0, %%mm2)\ + LBUTTERFLY(%%mm1, %%mm3)\ + LBUTTERFLY(%%mm4, %%mm6)\ + LBUTTERFLY(%%mm5, %%mm7)\ + \ + LBUTTERFLY(%%mm0, %%mm4)\ + LBUTTERFLY(%%mm1, %%mm5)\ + LBUTTERFLY(%%mm2, %%mm6)\ + LBUTTERFLY(%%mm3, %%mm7) + +#define MMABS(a,z)\ + "pxor " #z ", " #z " \n\t"\ + "pcmpgtw " #a ", " #z " \n\t"\ + "pxor " #z ", " #a " \n\t"\ + "psubw " #z ", " #a " \n\t" + +#define MMABS_SUM(a,z, sum)\ + "pxor " #z ", " #z " \n\t"\ + "pcmpgtw " #a ", " #z " \n\t"\ + "pxor " #z ", " #a " \n\t"\ + "psubw " #z ", " #a " \n\t"\ + "paddusw " #a ", " #sum " \n\t" + + +#define SBUTTERFLY(a,b,t,n)\ + "movq " #a ", " #t " \n\t" /* abcd */\ + "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\ + "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\ + +#define TRANSPOSE4(a,b,c,d,t)\ + SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\ + SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\ + SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\ + SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */ + +#define LOAD4(o, a, b, c, d)\ + "movq "#o"(%1), " #a " \n\t"\ + "movq "#o"+16(%1), " #b " \n\t"\ + "movq "#o"+32(%1), " #c " \n\t"\ + "movq "#o"+48(%1), " #d " \n\t" + +#define STORE4(o, a, b, c, d)\ + "movq "#a", "#o"(%1) \n\t"\ + "movq "#b", "#o"+16(%1) \n\t"\ + "movq "#c", "#o"+32(%1) \n\t"\ + "movq "#d", "#o"+48(%1) \n\t"\ + +static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){ + uint64_t temp[16] __align8; + int sum=0; + + diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride); + asm volatile( + LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) + LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7) + + HADAMARD48 + + "movq %%mm7, 112(%1) \n\t" + + TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) + STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2) + + "movq 112(%1), %%mm7 \n\t" + TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) + STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6) + + LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3) + LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) + + HADAMARD48 + + "movq %%mm7, 120(%1) \n\t" + + TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7) + STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2) + + "movq 120(%1), %%mm7 \n\t" + TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0) + "movq %%mm7, %%mm5 \n\t"//FIXME remove + "movq %%mm6, %%mm7 \n\t" + "movq %%mm0, %%mm6 \n\t" +// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove + + LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3) +// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7) + + HADAMARD48 + "movq %%mm7, 64(%1) \n\t" + MMABS(%%mm0, %%mm7) + MMABS_SUM(%%mm1, %%mm7, %%mm0) + MMABS_SUM(%%mm2, %%mm7, %%mm0) + MMABS_SUM(%%mm3, %%mm7, %%mm0) + MMABS_SUM(%%mm4, %%mm7, %%mm0) + MMABS_SUM(%%mm5, %%mm7, %%mm0) + MMABS_SUM(%%mm6, %%mm7, %%mm0) + "movq 64(%1), %%mm1 \n\t" + MMABS_SUM(%%mm1, %%mm7, %%mm0) + "movq %%mm0, 64(%1) \n\t" + + LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3) + LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7) + + HADAMARD48 + "movq %%mm7, (%1) \n\t" + MMABS(%%mm0, %%mm7) + MMABS_SUM(%%mm1, %%mm7, %%mm0) + MMABS_SUM(%%mm2, %%mm7, %%mm0) + MMABS_SUM(%%mm3, %%mm7, %%mm0) + MMABS_SUM(%%mm4, %%mm7, %%mm0) + MMABS_SUM(%%mm5, %%mm7, %%mm0) + MMABS_SUM(%%mm6, %%mm7, %%mm0) + "movq (%1), %%mm1 \n\t" + MMABS_SUM(%%mm1, %%mm7, %%mm0) + "movq 64(%1), %%mm1 \n\t" + MMABS_SUM(%%mm1, %%mm7, %%mm0) + + "movq %%mm0, %%mm1 \n\t" + "psrlq $32, %%mm0 \n\t" + "paddusw %%mm1, %%mm0 \n\t" + "movq %%mm0, %%mm1 \n\t" + "psrlq $16, %%mm0 \n\t" + "paddusw %%mm1, %%mm0 \n\t" + "movd %%mm0, %0 \n\t" + + : "=r" (sum) + : "r"(temp) + ); + return sum&0xFFFF; +} + +WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx) #if 0 static void just_return() { return; } @@ -579,7 +727,13 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) c->add_bytes= add_bytes_mmx; c->diff_bytes= diff_bytes_mmx; - + + c->hadamard8_diff[0]= hadamard8_diff16_mmx; + c->hadamard8_diff[1]= hadamard8_diff_mmx; + + c->sad[0]= sad16x16_mmx; + c->sad[1]= sad8x8_mmx; + if (mm_flags & MM_MMXEXT) { c->pix_abs16x16 = pix_abs16x16_mmx2; c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; @@ -591,6 +745,9 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask) c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; + c->sad[0]= sad16x16_mmx2; + c->sad[1]= sad8x8_mmx2; + c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; diff --git a/libavcodec/i386/motion_est_mmx.c b/libavcodec/i386/motion_est_mmx.c index 3368e73331..fa85db67b6 100644 --- a/libavcodec/i386/motion_est_mmx.c +++ b/libavcodec/i386/motion_est_mmx.c @@ -274,6 +274,15 @@ int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ \ return sum_ ## suf();\ }\ +int sad8x8_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t":);\ +\ + sad8_ ## suf(blk1, blk2, stride, 3);\ +\ + return sum_ ## suf();\ +}\ \ int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ {\ @@ -324,6 +333,16 @@ int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ \ return sum_ ## suf();\ }\ +int sad16x16_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\ +{\ + asm volatile("pxor %%mm7, %%mm7 \n\t"\ + "pxor %%mm6, %%mm6 \n\t":);\ +\ + sad8_ ## suf(blk1 , blk2 , stride, 4);\ + sad8_ ## suf(blk1+8, blk2+8, stride, 4);\ +\ + return sum_ ## suf();\ +}\ int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ {\ asm volatile("pxor %%mm7, %%mm7 \n\t"\ diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c index 3c688f7e03..ec531c3f3b 100644 --- a/libavcodec/motion_est.c +++ b/libavcodec/motion_est.c @@ -26,8 +26,10 @@ #include "dsputil.h" #include "mpegvideo.h" +//#undef NDEBUG +//#include <assert.h> + #define SQ(a) ((a)*(a)) -#define INTER_BIAS 257 #define P_LAST P[0] #define P_LEFT P[1] @@ -40,7 +42,295 @@ #define P_LAST_BOTTOM P[8] #define P_MV1 P[9] +static inline int sad_hpel_motion_search(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int xmin, int ymin, int xmax, int ymax, + int pred_x, int pred_y, Picture *picture, + int n, int size, uint16_t * const mv_penalty); + +static inline int update_map_generation(MpegEncContext * s) +{ + s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2); + if(s->me.map_generation==0){ + s->me.map_generation= 1<<(ME_MAP_MV_BITS*2); + memset(s->me.map, 0, sizeof(uint32_t)*ME_MAP_SIZE); + } + return s->me.map_generation; +} + + + +/* SIMPLE */ +#define RENAME(a) simple_ ## a + +#define CMP(d, x, y, size)\ +d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride); + +#define CMP_HPEL(d, dx, dy, x, y, size)\ +{\ + const int dxy= (dx) + 2*(dy);\ + hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ +} + +#define CMP_QPEL(d, dx, dy, x, y, size)\ +{\ + const int dxy= (dx) + 4*(dy);\ + qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ +} + +#include "motion_est_template.c" +#undef RENAME +#undef CMP +#undef CMP_HPEL +#undef CMP_QPEL +#undef INIT + +/* SIMPLE CHROMA */ +#define RENAME(a) simple_chroma_ ## a + +#define CMP(d, x, y, size)\ +d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\ +if(chroma_cmp){\ + int dxy= ((x)&1) + 2*((y)&1);\ + int c= ((x)>>1) + ((y)>>1)*uvstride;\ +\ + chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ + d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\ + chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ + d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\ +} + +#define CMP_HPEL(d, dx, dy, x, y, size)\ +{\ + const int dxy= (dx) + 2*(dy);\ + hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + if(chroma_cmp_sub){\ + int cxy= (dxy) | ((x)&1) | (2*((y)&1));\ + int c= ((x)>>1) + ((y)>>1)*uvstride;\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\ + }\ +} + +#define CMP_QPEL(d, dx, dy, x, y, size)\ +{\ + const int dxy= (dx) + 4*(dy);\ + qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ + d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ + if(chroma_cmp_sub){\ + int cxy, c;\ + int cx= (4*(x) + (dx))/2;\ + int cy= (4*(y) + (dy))/2;\ + cx= (cx>>1)|(cx&1);\ + cy= (cy>>1)|(cy&1);\ + cxy= (cx&1) + 2*(cy&1);\ + c= ((cx)>>1) + ((cy)>>1)*uvstride;\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\ + chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ + d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\ + }\ +} + +#include "motion_est_template.c" +#undef RENAME +#undef CMP +#undef CMP_HPEL +#undef CMP_QPEL +#undef INIT + +/* SIMPLE DIRECT HPEL */ +#define RENAME(a) simple_direct_hpel_ ## a +//FIXME precalc divisions stuff + +#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\ +if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*ymax){\ + const int hx= 2*(x) + (dx);\ + const int hy= 2*(y) + (dy);\ + if(s->mv_type==MV_TYPE_8X8){\ + int i;\ + for(i=0; i<4; i++){\ + int fx = s->me.direct_basis_mv[i][0] + hx;\ + int fy = s->me.direct_basis_mv[i][1] + hy;\ + int bx = hx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\ + int by = hy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\ + int fxy= (fx&1) + 2*(fy&1);\ + int bxy= (bx&1) + 2*(by&1);\ +\ + uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\ + hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\ + hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\ + }\ + }else{\ + int fx = s->me.direct_basis_mv[0][0] + hx;\ + int fy = s->me.direct_basis_mv[0][1] + hy;\ + int bx = hx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\ + int by = hy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\ + int fxy= (fx&1) + 2*(fy&1);\ + int bxy= (bx&1) + 2*(by&1);\ +\ + hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\ + hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\ + }\ + d = cmp_func(s, s->me.scratchpad, src_y, stride);\ +}else\ + d= 256*256*256*32; + + +#define CMP_HPEL(d, dx, dy, x, y, size)\ + CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub) + +#define CMP(d, x, y, size)\ + CMP_DIRECT(d, 0, 0, x, y, size, cmp) + +#include "motion_est_template.c" +#undef RENAME +#undef CMP +#undef CMP_HPEL +#undef CMP_QPEL +#undef INIT +#undef CMP_DIRECT + +/* SIMPLE DIRECT QPEL */ +#define RENAME(a) simple_direct_qpel_ ## a + +#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\ +if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*ymax){\ + const int qx= 4*(x) + (dx);\ + const int qy= 4*(y) + (dy);\ + if(s->mv_type==MV_TYPE_8X8){\ + int i;\ + for(i=0; i<4; i++){\ + int fx = s->me.direct_basis_mv[i][0] + qx;\ + int fy = s->me.direct_basis_mv[i][1] + qy;\ + int bx = qx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\ + int by = qy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\ + int fxy= (fx&3) + 4*(fy&3);\ + int bxy= (bx&3) + 4*(by&3);\ +\ + uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\ + qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\ + qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\ + }\ + }else{\ + int fx = s->me.direct_basis_mv[0][0] + qx;\ + int fy = s->me.direct_basis_mv[0][1] + qy;\ + int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\ + int by = qy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\ + int fxy= (fx&3) + 4*(fy&3);\ + int bxy= (bx&3) + 4*(by&3);\ +\ + qpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\ + qpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\ + }\ + d = cmp_func(s, s->me.scratchpad, src_y, stride);\ +}else\ + d= 256*256*256*32; + + +#define CMP_QPEL(d, dx, dy, x, y, size)\ + CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub) + +#define CMP(d, x, y, size)\ + CMP_DIRECT(d, 0, 0, x, y, size, cmp) + +#include "motion_est_template.c" +#undef RENAME +#undef CMP +#undef CMP_HPEL +#undef CMP_QPEL +#undef INIT +#undef CMP__DIRECT + + +static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){ + return 0; +} + +static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){ + DSPContext* c= &s->dsp; + int i; + + memset(cmp, 0, sizeof(void*)*11); + + switch(type&0xFF){ + case FF_CMP_SAD: + cmp[0]= c->sad[0]; + cmp[1]= c->sad[1]; + break; + case FF_CMP_SATD: + cmp[0]= c->hadamard8_diff[0]; + cmp[1]= c->hadamard8_diff[1]; + break; + case FF_CMP_SSE: + cmp[0]= c->sse[0]; + cmp[1]= c->sse[1]; + break; + case FF_CMP_DCT: + cmp[0]= c->dct_sad[0]; + cmp[1]= c->dct_sad[1]; + break; + case FF_CMP_PSNR: + cmp[0]= c->quant_psnr[0]; + cmp[1]= c->quant_psnr[1]; + break; + case FF_CMP_ZERO: + for(i=0; i<7; i++){ + cmp[i]= zero_cmp; + } + break; + default: + fprintf(stderr,"internal error in cmp function selection\n"); + } +}; + +static inline int get_penalty_factor(MpegEncContext *s, int type){ + + switch(type){ + default: + case FF_CMP_SAD: + return s->qscale; + case FF_CMP_SSE: +// return s->qscale*8; + case FF_CMP_DCT: + case FF_CMP_SATD: + return s->qscale*8; + } +} + +void ff_init_me(MpegEncContext *s){ + set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp); + set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp); + set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp); + if(s->flags&CODEC_FLAG_QPEL){ + if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) + s->me.sub_motion_search= simple_chroma_qpel_motion_search; + else + s->me.sub_motion_search= simple_qpel_motion_search; + }else{ + if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) + s->me.sub_motion_search= simple_chroma_hpel_motion_search; + else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD) + s->me.sub_motion_search= sad_hpel_motion_search; + else + s->me.sub_motion_search= simple_hpel_motion_search; + } + + if(s->avctx->me_cmp&FF_CMP_CHROMA){ + s->me.motion_search[0]= simple_chroma_epzs_motion_search; + s->me.motion_search[1]= simple_chroma_epzs_motion_search4; + }else{ + s->me.motion_search[0]= simple_epzs_motion_search; + s->me.motion_search[1]= simple_epzs_motion_search4; + } +} + static int pix_dev(UINT8 * pix, int line_size, int mean) { int s, i, j; @@ -294,492 +584,39 @@ static int phods_motion_search(MpegEncContext * s, #define Z_THRESHOLD 256 -#define CHECK_MV(x,y)\ -{\ - const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ - const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ - if(map[index]!=key){\ - d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ - d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ - COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ - map[index]= key;\ - score_map[index]= d;\ - }\ -} - -#define CHECK_MV_DIR(x,y,new_dir)\ -{\ - const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ - const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ - if(map[index]!=key){\ - d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ - d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ - if(d<dmin){\ - best[0]=x;\ - best[1]=y;\ - dmin=d;\ - next_dir= new_dir;\ - }\ - map[index]= key;\ - score_map[index]= d;\ - }\ -} - -#define CHECK_MV4(x,y)\ -{\ - const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ - const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ - if(map[index]!=key){\ - d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ - d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\ - COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ - map[index]= key;\ - score_map[index]= d;\ - }\ -} - -#define check(x,y,S,v)\ -if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\ -if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\ -if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\ -if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\ - - -static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin, - UINT8 *new_pic, UINT8 *old_pic, int pic_stride, - int pred_x, int pred_y, UINT16 *mv_penalty, int quant, - int xmin, int ymin, int xmax, int ymax, int shift, - uint32_t *map, uint16_t *score_map, int map_generation, - op_pixels_abs_func pix_abs) -{ - int next_dir=-1; - - for(;;){ - int d; - const int dir= next_dir; - const int x= best[0]; - const int y= best[1]; - next_dir=-1; - -//printf("%d", dir); - if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0) - if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1) - if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2) - if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3) - - if(next_dir==-1){ - return dmin; - } - } - -/* for(;;){ - int d; - const int x= best[0]; - const int y= best[1]; - const int last_min=dmin; - if(x>xmin) CHECK_MV(x-1, y ) - if(y>xmin) CHECK_MV(x , y-1) - if(x<xmax) CHECK_MV(x+1, y ) - if(y<xmax) CHECK_MV(x , y+1) - if(x>xmin && y>ymin) CHECK_MV(x-1, y-1) - if(x>xmin && y<ymax) CHECK_MV(x-1, y+1) - if(x<xmax && y>ymin) CHECK_MV(x+1, y-1) - if(x<xmax && y<ymax) CHECK_MV(x+1, y+1) - if(x-1>xmin) CHECK_MV(x-2, y ) - if(y-1>xmin) CHECK_MV(x , y-2) - if(x+1<xmax) CHECK_MV(x+2, y ) - if(y+1<xmax) CHECK_MV(x , y+2) - if(x-1>xmin && y-1>ymin) CHECK_MV(x-2, y-2) - if(x-1>xmin && y+1<ymax) CHECK_MV(x-2, y+2) - if(x+1<xmax && y-1>ymin) CHECK_MV(x+2, y-2) - if(x+1<xmax && y+1<ymax) CHECK_MV(x+2, y+2) - if(dmin==last_min) return dmin; - } - */ -} - -#if 1 -#define SNAKE_1 3 -#define SNAKE_2 2 -#else -#define SNAKE_1 7 -#define SNAKE_2 3 -#endif -static inline int snake_search(MpegEncContext * s, int *best, int dmin, - UINT8 *new_pic, UINT8 *old_pic, int pic_stride, - int pred_x, int pred_y, UINT16 *mv_penalty, int quant, - int xmin, int ymin, int xmax, int ymax, int shift, - uint32_t *map, uint16_t *score_map,int map_generation, - op_pixels_abs_func pix_abs) -{ - int dir=0; - int c=1; - static int x_dir[8]= {1,1,0,-1,-1,-1, 0, 1}; - static int y_dir[8]= {0,1,1, 1, 0,-1,-1,-1}; - int fails=0; - int last_d[2]={dmin, dmin}; - -/*static int good=0; -static int bad=0; -static int point=0; - -point++; -if(256*256*256*64%point==0) -{ - printf("%d %d %d\n", good, bad, point); -}*/ - - for(;;){ - int x= best[0]; - int y= best[1]; - int d; - x+=x_dir[dir]; - y+=y_dir[dir]; - if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){ - const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation; - const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1); - if(map[index]!=key){ - d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride); - d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant; - map[index]=key; - score_map[index]=d; - }else - d= dmin+1; - }else{ - d = dmin + 10000; //FIXME smarter boundary handling - } - if(d<dmin){ - best[0]=x; - best[1]=y; - dmin=d; - - if(last_d[1] - last_d[0] > last_d[0] - d) c= -c; - dir+=c; - - fails=0; -//good++; - last_d[1]=last_d[0]; - last_d[0]=d; - }else{ -//bad++; - if(fails){ - if(fails>=SNAKE_1+1) return dmin; - }else{ - if(dir&1) dir-= c*3; - else c= -c; -// c= -c; - } - dir+=c*SNAKE_2; - fails++; - } - dir&=7; - } -} - -static inline int cross_search(MpegEncContext * s, int *best, int dmin, - UINT8 *new_pic, UINT8 *old_pic, int pic_stride, - int pred_x, int pred_y, UINT16 *mv_penalty, int quant, - int xmin, int ymin, int xmax, int ymax, int shift, - uint32_t *map, uint16_t *score_map,int map_generation, - op_pixels_abs_func pix_abs) -{ - static int x_dir[4]= {-1, 0, 1, 0}; - static int y_dir[4]= { 0,-1, 0, 1}; - int improvement[2]={100000, 100000}; - int dirs[2]={2, 3}; - int dir; - int last_dir= -1; - - for(;;){ - dir= dirs[ improvement[0] > improvement[1] ? 0 : 1 ]; - if(improvement[dir&1]==-1) return dmin; - - { - const int x= best[0] + x_dir[dir]; - const int y= best[1] + y_dir[dir]; - const int key= (y<<ME_MAP_MV_BITS) + x + map_generation; - const int index= ((y<<ME_MAP_SHIFT) + x)&(ME_MAP_SIZE-1); - int d; - if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){ - if(map[index]!=key){ - d = pix_abs(new_pic, old_pic + x + y*pic_stride, pic_stride); - d += (mv_penalty[(x<<shift)-pred_x] + mv_penalty[(y<<shift)-pred_y])*quant; - map[index]=key; - score_map[index]=d; - if(d<dmin){ - improvement[dir&1]= dmin-d; - improvement[(dir&1)^1]++; - dmin=d; - best[0]= x; - best[1]= y; - last_dir=dir; - continue; - } - }else{ - d= score_map[index]; - } - }else{ - d= dmin + 1000; //FIXME is this a good idea? - } - /* evaluated point was cached or checked and worse */ - - if(last_dir==dir){ - improvement[dir&1]= -1; - }else{ - improvement[dir&1]= d-dmin; - last_dir= dirs[dir&1]= dir^2; - } - } - } -} - -static inline int update_map_generation(MpegEncContext * s) -{ - s->me_map_generation+= 1<<(ME_MAP_MV_BITS*2); - if(s->me_map_generation==0){ - s->me_map_generation= 1<<(ME_MAP_MV_BITS*2); - memset(s->me_map, 0, sizeof(uint32_t)*ME_MAP_SIZE); - } - return s->me_map_generation; -} - -static int epzs_motion_search(MpegEncContext * s, - int *mx_ptr, int *my_ptr, - int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture) -{ - int best[2]={0, 0}; - int d, dmin; - UINT8 *new_pic, *old_pic; - const int pic_stride= s->linesize; - const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16; - UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame - int quant= s->qscale; // qscale of the prev frame - const int shift= 1+s->quarter_sample; - uint32_t *map= s->me_map; - uint16_t *score_map= s->me_score_map; - int map_generation; - - new_pic = s->new_picture.data[0] + pic_xy; - old_pic = ref_picture + pic_xy; - - map_generation= update_map_generation(s); - - dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride); - map[0]= map_generation; - score_map[0]= dmin; - - /* first line */ - if ((s->mb_y == 0 || s->first_slice_line)) { - CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) - CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift) - }else{ - if(dmin<256 && ( P_LEFT[0] |P_LEFT[1] - |P_TOP[0] |P_TOP[1] - |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ - *mx_ptr= 0; - *my_ptr= 0; - s->skip_me=1; - return dmin; - } - CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) - if(dmin>256*2){ - CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift) - CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) - CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift) - CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) - } - } - if(dmin>256*4){ - CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift) - CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift) - } -#if 0 //doest only slow things down - if(dmin>512*3){ - int step; - dmin= score_map[0]; - best[0]= best[1]=0; - for(step=128; step>0; step>>=1){ - const int step2= step; - int y; - for(y=-step2+best[1]; y<=step2+best[1]; y+=step){ - int x; - if(y<ymin || y>ymax) continue; - - for(x=-step2+best[0]; x<=step2+best[0]; x+=step){ - if(x<xmin || x>xmax) continue; - if(x==best[0] && y==best[1]) continue; - CHECK_MV(x,y) - } - } - } - } -#endif -//check(best[0],best[1],0, b0) - if(s->me_method==ME_EPZS) - dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, - pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, - shift, map, score_map, map_generation, s->dsp.pix_abs16x16); - else - dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride, - pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, - shift, map, score_map, map_generation, s->dsp.pix_abs16x16); -//check(best[0],best[1],0, b1) - *mx_ptr= best[0]; - *my_ptr= best[1]; - -// printf("%d %d %d \n", best[0], best[1], dmin); - return dmin; -} - -static int epzs_motion_search4(MpegEncContext * s, int block, - int *mx_ptr, int *my_ptr, - int P[10][2], int pred_x, int pred_y, - int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture) -{ - int best[2]={0, 0}; - int d, dmin; - UINT8 *new_pic, *old_pic; - const int pic_stride= s->linesize; - const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8; - UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame - int quant= s->qscale; // qscale of the prev frame - const int shift= 1+s->quarter_sample; - uint32_t *map= s->me_map; - uint16_t *score_map= s->me_score_map; - int map_generation; - - new_pic = s->new_picture.data[0] + pic_xy; - old_pic = ref_picture + pic_xy; - - map_generation= update_map_generation(s); - - dmin = 1000000; -//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); - /* first line */ - if ((s->mb_y == 0 || s->first_slice_line) && block<2) { - CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift) - CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift) - CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift) - }else{ - CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift) - //FIXME try some early stop - if(dmin>64*2){ - CHECK_MV4(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) - CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift) - CHECK_MV4(P_TOP[0]>>shift, P_TOP[1]>>shift) - CHECK_MV4(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) - CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift) - } - } - if(dmin>64*4){ - CHECK_MV4(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift) - CHECK_MV4(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift) - } - - if(s->me_method==ME_EPZS) - dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, - pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, - shift, map, score_map, map_generation, s->dsp.pix_abs8x8); - else - dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride, - pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, - shift, map, score_map, map_generation, s->dsp.pix_abs8x8); - - *mx_ptr= best[0]; - *my_ptr= best[1]; - -// printf("%d %d %d \n", best[0], best[1], dmin); - return dmin; -} - -#define CHECK_HALF_MV(suffix, x, y) \ +#define CHECK_SAD_HALF_MV(suffix, x, y) \ {\ d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ - d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\ + d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\ COPY3_IF_LT(dminh, d, dx, x, dy, y)\ } - -/* The idea would be to make half pel ME after Inter/Intra decision to - save time. */ -static inline int halfpel_motion_search(MpegEncContext * s, +static inline int sad_hpel_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, uint8_t *ref_picture, - op_pixels_abs_func pix_abs_x2, - op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n) + int pred_x, int pred_y, Picture *picture, + int n, int size, uint16_t * const mv_penalty) { - UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame - const int quant= s->qscale; + uint8_t *ref_picture= picture->data[0]; + uint32_t *score_map= s->me.score_map; + const int penalty_factor= s->me.sub_penalty_factor; int mx, my, xx, yy, dminh; UINT8 *pix, *ptr; - - if(s->skip_me){ - *mx_ptr = 0; - *my_ptr = 0; - return dmin; - } - - xx = 16 * s->mb_x + 8*(n&1); - yy = 16 * s->mb_y + 8*(n>>1); - pix = s->new_picture.data[0] + (yy * s->linesize) + xx; - - mx = *mx_ptr; - my = *my_ptr; - ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx); + op_pixels_abs_func pix_abs_x2; + op_pixels_abs_func pix_abs_y2; + op_pixels_abs_func pix_abs_xy2; - dminh = dmin; - - if (mx > xmin && mx < xmax && - my > ymin && my < ymax) { - int dx=0, dy=0; - int d, pen_x, pen_y; - - mx<<=1; - my<<=1; - - pen_x= pred_x + mx; - pen_y= pred_y + my; - - ptr-= s->linesize; - CHECK_HALF_MV(xy2, -1, -1) - CHECK_HALF_MV(y2 , 0, -1) - CHECK_HALF_MV(xy2, +1, -1) - - ptr+= s->linesize; - CHECK_HALF_MV(x2 , -1, 0) - CHECK_HALF_MV(x2 , +1, 0) - CHECK_HALF_MV(xy2, -1, +1) - CHECK_HALF_MV(y2 , 0, +1) - CHECK_HALF_MV(xy2, +1, +1) - - mx+=dx; - my+=dy; + if(size==0){ + pix_abs_x2 = s->dsp.pix_abs16x16_x2; + pix_abs_y2 = s->dsp.pix_abs16x16_y2; + pix_abs_xy2= s->dsp.pix_abs16x16_xy2; }else{ - mx<<=1; - my<<=1; + pix_abs_x2 = s->dsp.pix_abs8x8_x2; + pix_abs_y2 = s->dsp.pix_abs8x8_y2; + pix_abs_xy2= s->dsp.pix_abs8x8_xy2; } - *mx_ptr = mx; - *my_ptr = my; - return dminh; -} - -static inline int fast_halfpel_motion_search(MpegEncContext * s, - int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, uint8_t *ref_picture, - op_pixels_abs_func pix_abs_x2, - op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n) -{ - UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame - uint16_t *score_map= s->me_score_map; - const int quant= s->qscale; - int mx, my, xx, yy, dminh; - UINT8 *pix, *ptr; - - if(s->skip_me){ + if(s->me.skip){ // printf("S"); *mx_ptr = 0; *my_ptr = 0; @@ -815,51 +652,51 @@ static inline int fast_halfpel_motion_search(MpegEncContext * s, ptr-= s->linesize; if(t<=b){ - CHECK_HALF_MV(y2 , 0, -1) + CHECK_SAD_HALF_MV(y2 , 0, -1) if(l<=r){ - CHECK_HALF_MV(xy2, -1, -1) + CHECK_SAD_HALF_MV(xy2, -1, -1) if(t+r<=b+l){ - CHECK_HALF_MV(xy2, +1, -1) + CHECK_SAD_HALF_MV(xy2, +1, -1) ptr+= s->linesize; }else{ ptr+= s->linesize; - CHECK_HALF_MV(xy2, -1, +1) + CHECK_SAD_HALF_MV(xy2, -1, +1) } - CHECK_HALF_MV(x2 , -1, 0) + CHECK_SAD_HALF_MV(x2 , -1, 0) }else{ - CHECK_HALF_MV(xy2, +1, -1) + CHECK_SAD_HALF_MV(xy2, +1, -1) if(t+l<=b+r){ - CHECK_HALF_MV(xy2, -1, -1) + CHECK_SAD_HALF_MV(xy2, -1, -1) ptr+= s->linesize; }else{ ptr+= s->linesize; - CHECK_HALF_MV(xy2, +1, +1) + CHECK_SAD_HALF_MV(xy2, +1, +1) } - CHECK_HALF_MV(x2 , +1, 0) + CHECK_SAD_HALF_MV(x2 , +1, 0) } }else{ if(l<=r){ if(t+l<=b+r){ - CHECK_HALF_MV(xy2, -1, -1) + CHECK_SAD_HALF_MV(xy2, -1, -1) ptr+= s->linesize; }else{ ptr+= s->linesize; - CHECK_HALF_MV(xy2, +1, +1) + CHECK_SAD_HALF_MV(xy2, +1, +1) } - CHECK_HALF_MV(x2 , -1, 0) - CHECK_HALF_MV(xy2, -1, +1) + CHECK_SAD_HALF_MV(x2 , -1, 0) + CHECK_SAD_HALF_MV(xy2, -1, +1) }else{ if(t+r<=b+l){ - CHECK_HALF_MV(xy2, +1, -1) + CHECK_SAD_HALF_MV(xy2, +1, -1) ptr+= s->linesize; }else{ ptr+= s->linesize; - CHECK_HALF_MV(xy2, -1, +1) + CHECK_SAD_HALF_MV(xy2, -1, +1) } - CHECK_HALF_MV(x2 , +1, 0) - CHECK_HALF_MV(xy2, +1, +1) + CHECK_SAD_HALF_MV(x2 , +1, 0) + CHECK_SAD_HALF_MV(xy2, +1, +1) } - CHECK_HALF_MV(y2 , 0, +1) + CHECK_SAD_HALF_MV(y2 , 0, +1) } mx+=dx; my+=dy; @@ -933,6 +770,7 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in int P[10][2]; uint8_t *ref_picture= s->last_picture.data[0]; int dmin_sum=0; + uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; for(block=0; block<4; block++){ int mx4, my4; @@ -995,11 +833,11 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in P_MV1[0]= mx; P_MV1[1]= my; - dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture); + dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, + &s->last_picture, mv_penalty); - dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, - pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2, - s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block); + dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, + pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty); s->motion_val[ s->block_index[block] ][0]= mx4; s->motion_val[ s->block_index[block] ][1]= my4; @@ -1021,13 +859,19 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, int mb_type=0; uint8_t *ref_picture= s->last_picture.data[0]; Picture * const pic= &s->current_picture; + uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; + + assert(s->quarter_sample==0 || s->quarter_sample==1); + + s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); + s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code); rel_xmin= xmin - mb_x*16; rel_xmax= xmax - mb_x*16; rel_ymin= ymin - mb_y*16; rel_ymax= ymax - mb_y*16; - s->skip_me=0; + s->me.skip=0; switch(s->me_method) { case ME_ZERO: @@ -1096,7 +940,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, } } } - dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture); + dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + &s->last_picture, mv_penalty); break; } @@ -1112,8 +957,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, sum = s->dsp.pix_sum(pix, s->linesize); varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; - // FIXME: MMX OPTIMIZE - vard = (s->dsp.pix_norm(pix, ppix, s->linesize)+128)>>8; + vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8; //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); pic->mb_var [s->mb_width * mb_y + mb_x] = varc; @@ -1137,20 +981,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, mb_type|= MB_TYPE_INTRA; if (varc*2 + 200 > vard){ mb_type|= MB_TYPE_INTER; - if(s->me_method >= ME_EPZS) - fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, - s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0); - else - halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, - s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0); + s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); }else{ - mx <<=1; - my <<=1; + mx <<=shift; + my <<=shift; } if((s->flags&CODEC_FLAG_4MV) - && !s->skip_me && varc>50 && vard>10){ + && !s->me.skip && varc>50 && vard>10){ mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); mb_type|=MB_TYPE_INTER4V; @@ -1159,19 +997,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, set_p_mv_tables(s, mx, my, 1); }else{ if (vard <= 64 || vard < varc) { +// if (sadP <= 32 || sadP < sadI + 500) { s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); mb_type|= MB_TYPE_INTER; if (s->me_method != ME_ZERO) { - if(s->me_method >= ME_EPZS) - dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, - s->dsp.pix_abs16x16_xy2, 0); - else - dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, - s->dsp.pix_abs16x16_xy2, 0); + dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); if((s->flags&CODEC_FLAG_4MV) - && !s->skip_me && varc>50 && vard>10){ + && !s->me.skip && varc>50 && vard>10){ int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); if(dmin4 + 128 <dmin) mb_type= MB_TYPE_INTER4V; @@ -1179,8 +1012,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V); } else { - mx <<=1; - my <<=1; + mx <<=shift; + my <<=shift; } #if 0 if (vard < 10) { @@ -1201,7 +1034,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, } int ff_estimate_motion_b(MpegEncContext * s, - int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *ref_picture, int f_code) + int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code) { int mx, my, range, dmin; int xmin, ymin, xmax, ymax; @@ -1211,7 +1044,12 @@ int ff_estimate_motion_b(MpegEncContext * s, const int shift= 1+s->quarter_sample; const int mot_stride = s->mb_width + 2; const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1; - + uint8_t * const ref_picture= picture->data[0]; + uint16_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV; + + s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); + s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); + get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code); rel_xmin= xmin - mb_x*16; rel_xmax= xmax - mb_x*16; @@ -1275,22 +1113,22 @@ int ff_estimate_motion_b(MpegEncContext * s, pred_x= P_LEFT[0]; pred_y= P_LEFT[1]; } - dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture); + dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + picture, mv_penalty); break; } - dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, - s->dsp.pix_abs16x16_xy2, 0); + dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, picture, 0, 0, mv_penalty); //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; mv_table[mot_xy][0]= mx; mv_table[mot_xy][1]= my; + return dmin; } - static inline int check_bidir_mv(MpegEncContext * s, int mb_x, int mb_y, int motion_fx, int motion_fy, @@ -1299,45 +1137,57 @@ static inline int check_bidir_mv(MpegEncContext * s, int pred_bx, int pred_by) { //FIXME optimize? - //FIXME direct mode penalty - UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame - uint8_t *dest_y = s->me_scratchpad; + //FIXME move into template? + //FIXME better f_code prediction (max mv & distance) + UINT16 *mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame + uint8_t *dest_y = s->me.scratchpad; uint8_t *ptr; int dxy; int src_x, src_y; int fbmin; - fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->qscale; - - dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); - src_x = mb_x * 16 + (motion_fx >> 1); - src_y = mb_y * 16 + (motion_fy >> 1); - src_x = clip(src_x, -16, s->width); - if (src_x == s->width) - dxy&= 2; - src_y = clip(src_y, -16, s->height); - if (src_y == s->height) - dxy&= 1; - - ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); - - fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale; - - dxy = ((motion_by & 1) << 1) | (motion_bx & 1); - src_x = mb_x * 16 + (motion_bx >> 1); - src_y = mb_y * 16 + (motion_by >> 1); - src_x = clip(src_x, -16, s->width); - if (src_x == s->width) - dxy&= 2; - src_y = clip(src_y, -16, s->height); - if (src_y == s->height) - dxy&= 1; - - ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); - - fbmin += s->dsp.pix_abs16x16(s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); + if(s->quarter_sample){ + dxy = ((motion_fy & 3) << 2) | (motion_fx & 3); + src_x = mb_x * 16 + (motion_fx >> 2); + src_y = mb_y * 16 + (motion_fy >> 2); + assert(src_x >=-16 && src_x<=s->width); + assert(src_y >=-16 && src_y<=s->height); + + ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; + s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize); + + dxy = ((motion_by & 3) << 2) | (motion_bx & 3); + src_x = mb_x * 16 + (motion_bx >> 2); + src_y = mb_y * 16 + (motion_by >> 2); + assert(src_x >=-16 && src_x<=s->width); + assert(src_y >=-16 && src_y<=s->height); + + ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x; + s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize); + }else{ + dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); + src_x = mb_x * 16 + (motion_fx >> 1); + src_y = mb_y * 16 + (motion_fy >> 1); + assert(src_x >=-16 && src_x<=s->width); + assert(src_y >=-16 && src_y<=s->height); + + ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; + s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); + + dxy = ((motion_by & 1) << 1) | (motion_bx & 1); + src_x = mb_x * 16 + (motion_bx >> 1); + src_y = mb_y * 16 + (motion_by >> 1); + assert(src_x >=-16 && src_x<=s->width); + assert(src_y >=-16 && src_y<=s->height); + + ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x; + s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); + } + + fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor + +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor; + + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); + return fbmin; } @@ -1374,66 +1224,14 @@ static inline int direct_search(MpegEncContext * s, int P[10][2]; const int mot_stride = s->mb_width + 2; const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1; - int dmin, dmin2; - int motion_fx, motion_fy, motion_bx, motion_by, motion_bx0, motion_by0; - int motion_dx, motion_dy; - const int motion_px= s->p_mv_table[mot_xy][0]; - const int motion_py= s->p_mv_table[mot_xy][1]; + const int shift= 1+s->quarter_sample; + int dmin, i; const int time_pp= s->pp_time; const int time_pb= s->pb_time; - const int time_bp= time_pp - time_pb; - int bx, by; - int mx, my, mx2, my2; - uint8_t *ref_picture= s->me_scratchpad - (mb_x - 1 + (mb_y - 1)*s->linesize)*16; + int mx, my, xmin, xmax, ymin, ymax; int16_t (*mv_table)[2]= s->b_direct_mv_table; -/* uint16_t *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; */ // f_code of the prev frame - - /* thanks to iso-mpeg the rounding is different for the zero vector, so we need to handle that ... */ - motion_fx= (motion_px*time_pb)/time_pp; - motion_fy= (motion_py*time_pb)/time_pp; - motion_bx0= (-motion_px*time_bp)/time_pp; - motion_by0= (-motion_py*time_bp)/time_pp; - motion_dx= motion_dy=0; - dmin2= check_bidir_mv(s, mb_x, mb_y, - motion_fx, motion_fy, - motion_bx0, motion_by0, - motion_fx, motion_fy, - motion_bx0, motion_by0) - s->qscale; - - motion_bx= motion_fx - motion_px; - motion_by= motion_fy - motion_py; - for(by=-1; by<2; by++){ - for(bx=-1; bx<2; bx++){ - uint8_t *dest_y = s->me_scratchpad + (by+1)*s->linesize*16 + (bx+1)*16; - uint8_t *ptr; - int dxy; - int src_x, src_y; - const int width= s->width; - const int height= s->height; - - dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); - src_x = (mb_x + bx) * 16 + (motion_fx >> 1); - src_y = (mb_y + by) * 16 + (motion_fy >> 1); - src_x = clip(src_x, -16, width); - if (src_x == width) dxy &= ~1; - src_y = clip(src_y, -16, height); - if (src_y == height) dxy &= ~2; - - ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; - s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); - - dxy = ((motion_by & 1) << 1) | (motion_bx & 1); - src_x = (mb_x + bx) * 16 + (motion_bx >> 1); - src_y = (mb_y + by) * 16 + (motion_by >> 1); - src_x = clip(src_x, -16, width); - if (src_x == width) dxy &= ~1; - src_y = clip(src_y, -16, height); - if (src_y == height) dxy &= ~2; - - s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); - } - } - + uint16_t * const mv_penalty= s->me.mv_penalty[1] + MAX_MV; + P_LAST[0] = mv_table[mot_xy ][0]; P_LAST[1] = mv_table[mot_xy ][1]; P_LEFT[0] = mv_table[mot_xy - 1][0]; @@ -1458,62 +1256,81 @@ static inline int direct_search(MpegEncContext * s, P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); } - dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture); - if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff - if(dmin2<dmin){ - dmin= dmin2; - mx=0; - my=0; + + ymin= xmin=(-32)>>shift; + ymax= xmax= 31>>shift; + + if(s->co_located_type_table[mb_x + mb_y*s->mb_width]==CO_LOCATED_TYPE_4MV){ + s->mv_type= MV_TYPE_8X8; + }else{ + s->mv_type= MV_TYPE_16X16; } -#if 1 - mx2= mx= mx*2; - my2= my= my*2; - for(by=-1; by<2; by++){ - if(my2+by < -32) continue; - for(bx=-1; bx<2; bx++){ - if(bx==0 && by==0) continue; - if(mx2+bx < -32) continue; - dmin2= check_bidir_mv(s, mb_x, mb_y, - mx2+bx+motion_fx, my2+by+motion_fy, - mx2+bx+motion_bx, my2+by+motion_by, - mx2+bx+motion_fx, my2+by+motion_fy, - motion_bx, motion_by) - s->qscale; - - if(dmin2<dmin){ - dmin=dmin2; - mx= mx2 + bx; - my= my2 + by; - } - } + + for(i=0; i<4; i++){ + int index= s->block_index[i]; + int min, max; + + s->me.co_located_mv[i][0]= s->motion_val[index][0]; + s->me.co_located_mv[i][1]= s->motion_val[index][1]; + s->me.direct_basis_mv[i][0]= s->me.co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3)); + s->me.direct_basis_mv[i][1]= s->me.co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3)); +// s->me.direct_basis_mv[1][i][0]= s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3); +// s->me.direct_basis_mv[1][i][1]= s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3); + + max= FFMAX(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift; + min= FFMIN(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift; + max+= (2*mb_x + (i& 1))*8 - 1; // +-1 is for the simpler rounding + min+= (2*mb_x + (i& 1))*8 + 1; + if(max >= s->width) xmax= s->width - max - 1; + if(min < -16 ) xmin= - 32 - min; + + max= FFMAX(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift; + min= FFMIN(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift; + max+= (2*mb_y + (i>>1))*8 - 1; // +-1 is for the simpler rounding + min+= (2*mb_y + (i>>1))*8 + 1; + if(max >= s->height) ymax= s->height - max - 1; + if(min < -16 ) ymin= - 32 - min; + + if(s->mv_type == MV_TYPE_16X16) break; } -#else - mx*=2; my*=2; -#endif - if(mx==0 && my==0){ - motion_bx= motion_bx0; - motion_by= motion_by0; + + assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16); + + if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){ + s->b_direct_mv_table[mot_xy][0]= 0; + s->b_direct_mv_table[mot_xy][1]= 0; + + return 256*256*256*64; + } + + if(s->flags&CODEC_FLAG_QPEL){ + dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, + &s->last_picture, mv_penalty); + dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, + 0, 0, &s->last_picture, 0, 0, mv_penalty); + }else{ + dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, + &s->last_picture, mv_penalty); + dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, + 0, 0, &s->last_picture, 0, 0, mv_penalty); } s->b_direct_mv_table[mot_xy][0]= mx; s->b_direct_mv_table[mot_xy][1]= my; - s->b_direct_forw_mv_table[mot_xy][0]= motion_fx + mx; - s->b_direct_forw_mv_table[mot_xy][1]= motion_fy + my; - s->b_direct_back_mv_table[mot_xy][0]= motion_bx + mx; - s->b_direct_back_mv_table[mot_xy][1]= motion_by + my; return dmin; } void ff_estimate_b_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { - const int quant= s->qscale; + const int penalty_factor= s->me.penalty_factor; int fmin, bmin, dmin, fbmin; int type=0; dmin= direct_search(s, mb_x, mb_y); - fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture.data[0], s->f_code); - bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture.data[0], s->b_code) - quant; + fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code); + bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor; //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); fbmin= bidir_refine(s, mb_x, mb_y); @@ -1541,22 +1358,10 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, if(s->flags&CODEC_FLAG_HQ){ type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter + if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB } -/* -{ -static int count=0; -static int sum=0; -if(type==MB_TYPE_DIRECT){ - int diff= ABS(s->b_forw_mv_table) -} -}*/ - s->mb_type[mb_y*s->mb_width + mb_x]= type; -/* if(mb_y==0 && mb_x==0) printf("\n"); - if(mb_x==0) printf("\n"); - printf("%d", av_log2(type)); -*/ } /* find best f_code for ME which do unlimited searches */ @@ -1569,7 +1374,7 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type) int best_fcode=-1; int best_score=-10000000; - for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); //FIXME *2 and all other too so its the same but nicer + for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); for(y=0; y<s->mb_height; y++){ int x; diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c new file mode 100644 index 0000000000..45b23ce173 --- /dev/null +++ b/libavcodec/motion_est_template.c @@ -0,0 +1,737 @@ +/* + * Motion estimation + * Copyright (c) 2002 Michael Niedermayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) +//Note, the last line is there to kill these ugly unused var warnings +#define LOAD_COMMON(x, y)\ + uint32_t * const score_map= s->me.score_map;\ + const int stride= s->linesize;\ + const int uvstride= s->uvlinesize;\ + const int time_pp= s->pp_time;\ + const int time_pb= s->pb_time;\ + uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\ + uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\ + uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\ + uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\ + uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\ + uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\ + uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\ + op_pixels_func (*hpel_put)[4];\ + op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\ + op_pixels_func (*chroma_hpel_put)[4];\ + qpel_mc_func (*qpel_put)[16];\ + qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\ + const __attribute__((unused)) int unu= time_pp + time_pb + (int)src_u + (int)src_v + (int)ref_u + (int)ref_v\ + + (int)ref2_y + (int)hpel_avg + (int)qpel_avg;\ + if(s->no_rounding /*FIXME b_type*/){\ + hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\ + chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\ + qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\ + }else{\ + hpel_put=& s->dsp.put_pixels_tab[size];\ + chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\ + qpel_put= &s->dsp.put_qpel_pixels_tab[size];\ + } + + +#ifdef CMP_HPEL + +#define CHECK_HALF_MV(dx, dy, x, y)\ +{\ + const int hx= 2*(x)+(dx);\ + const int hy= 2*(y)+(dy);\ + CMP_HPEL(d, dx, dy, x, y, size);\ + d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ + COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ +} + +#if 0 +static int RENAME(hpel_motion_search)(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int xmin, int ymin, int xmax, int ymax, + int pred_x, int pred_y, Picture *ref_picture, + int n, int size) +{ + UINT8 *ptr; + + const int xx = 16 * s->mb_x + 8*(n&1); + const int yy = 16 * s->mb_y + 8*(n>>1); + const int mx = *mx_ptr; + const int my = *my_ptr; + + LOAD_COMMON(xx, yy); + + // INIT; + //FIXME factorize + me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub; + + if(s->no_rounding /*FIXME b_type*/){ + hpel_put= &s->dsp.put_no_rnd_pixels_tab[size]; + chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1]; + }else{ + hpel_put=& s->dsp.put_pixels_tab[size]; + chroma_hpel_put= &s->dsp.put_pixels_tab[size+1]; + } + cmp= s->dsp.me_cmp[size]; + chroma_cmp= s->dsp.me_cmp[size+1]; + cmp_sub= s->dsp.me_sub_cmp[size]; + chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; + + if(s->me.skip){ //FIXME somehow move up (benchmark) + *mx_ptr = 0; + *my_ptr = 0; + return dmin; + } + + if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ + CMP_HPEL(dmin, 0, 0, mx, my, size); + if(mx || my) + dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; + } + + if (mx > xmin && mx < xmax && + my > ymin && my < ymax) { + int bx=2*mx, by=2*my; + int d= dmin; + + CHECK_HALF_MV(1, 1, mx-1, my-1) + CHECK_HALF_MV(0, 1, mx , my-1) + CHECK_HALF_MV(1, 1, mx , my-1) + CHECK_HALF_MV(1, 0, mx-1, my ) + CHECK_HALF_MV(1, 0, mx , my ) + CHECK_HALF_MV(1, 1, mx-1, my ) + CHECK_HALF_MV(0, 1, mx , my ) + CHECK_HALF_MV(1, 1, mx , my ) + + assert(bx < xmin*2 || bx > xmax*2 || by < ymin*2 || by > ymax*2); + + *mx_ptr = bx; + *my_ptr = by; + }else{ + *mx_ptr =2*mx; + *my_ptr =2*my; + } + + return dmin; +} + +#else +static int RENAME(hpel_motion_search)(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int xmin, int ymin, int xmax, int ymax, + int pred_x, int pred_y, Picture *ref_picture, + int n, int size, uint16_t * const mv_penalty) +{ + const int xx = 16 * s->mb_x + 8*(n&1); + const int yy = 16 * s->mb_y + 8*(n>>1); + const int mx = *mx_ptr; + const int my = *my_ptr; + const int penalty_factor= s->me.sub_penalty_factor; + me_cmp_func cmp_sub, chroma_cmp_sub; + + LOAD_COMMON(xx, yy); + + //FIXME factorize + + cmp_sub= s->dsp.me_sub_cmp[size]; + chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; + + if(s->me.skip){ //FIXME move out of hpel? + *mx_ptr = 0; + *my_ptr = 0; + return dmin; + } + + if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ + CMP_HPEL(dmin, 0, 0, mx, my, size); + if(mx || my) + dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; + } + + if (mx > xmin && mx < xmax && + my > ymin && my < ymax) { + int bx=2*mx, by=2*my; + int d= dmin; + const int index= (my<<ME_MAP_SHIFT) + mx; + const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] + + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*penalty_factor; + const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)] + + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*penalty_factor; + const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)] + + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*penalty_factor; + const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] + + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*penalty_factor; + + if(t<=b){ + CHECK_HALF_MV(0, 1, mx ,my-1) + if(l<=r){ + CHECK_HALF_MV(1, 1, mx-1, my-1) + if(t+r<=b+l){ + CHECK_HALF_MV(1, 1, mx , my-1) + }else{ + CHECK_HALF_MV(1, 1, mx-1, my ) + } + CHECK_HALF_MV(1, 0, mx-1, my ) + }else{ + CHECK_HALF_MV(1, 1, mx , my-1) + if(t+l<=b+r){ + CHECK_HALF_MV(1, 1, mx-1, my-1) + }else{ + CHECK_HALF_MV(1, 1, mx , my ) + } + CHECK_HALF_MV(1, 0, mx , my ) + } + }else{ + if(l<=r){ + if(t+l<=b+r){ + CHECK_HALF_MV(1, 1, mx-1, my-1) + }else{ + CHECK_HALF_MV(1, 1, mx , my ) + } + CHECK_HALF_MV(1, 0, mx-1, my) + CHECK_HALF_MV(1, 1, mx-1, my) + }else{ + if(t+r<=b+l){ + CHECK_HALF_MV(1, 1, mx , my-1) + }else{ + CHECK_HALF_MV(1, 1, mx-1, my) + } + CHECK_HALF_MV(1, 0, mx , my) + CHECK_HALF_MV(1, 1, mx , my) + } + CHECK_HALF_MV(0, 1, mx , my) + } + assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2); + + *mx_ptr = bx; + *my_ptr = by; + }else{ + *mx_ptr =2*mx; + *my_ptr =2*my; + } + + return dmin; +} +#endif + +#endif /* CMP_HPEL */ + +#ifdef CMP_QPEL + +#define CHECK_QUARTER_MV(dx, dy, x, y)\ +{\ + const int hx= 4*(x)+(dx);\ + const int hy= 4*(y)+(dy);\ + CMP_QPEL(d, dx, dy, x, y, size);\ + d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ + COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ +} + +static int RENAME(qpel_motion_search)(MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int xmin, int ymin, int xmax, int ymax, + int pred_x, int pred_y, Picture *ref_picture, + int n, int size, uint16_t * const mv_penalty) +{ + const int xx = 16 * s->mb_x + 8*(n&1); + const int yy = 16 * s->mb_y + 8*(n>>1); + const int mx = *mx_ptr; + const int my = *my_ptr; + const int penalty_factor= s->me.sub_penalty_factor; + const int map_generation= s->me.map_generation; + uint32_t *map= s->me.map; + me_cmp_func cmp, chroma_cmp; + me_cmp_func cmp_sub, chroma_cmp_sub; + + LOAD_COMMON(xx, yy); + + cmp= s->dsp.me_cmp[size]; + chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME + //FIXME factorize + + cmp_sub= s->dsp.me_sub_cmp[size]; + chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; + + if(s->me.skip){ //FIXME somehow move up (benchmark) + *mx_ptr = 0; + *my_ptr = 0; + return dmin; + } + + if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ + CMP_QPEL(dmin, 0, 0, mx, my, size); + if(mx || my) + dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor; + } + + if (mx > xmin && mx < xmax && + my > ymin && my < ymax) { + int bx=4*mx, by=4*my; + int d= dmin; + int i, nx, ny; + const int index= (my<<ME_MAP_SHIFT) + mx; + const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; + const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]; + const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]; + const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; + const int c= score_map[(index )&(ME_MAP_SIZE-1)]; + int best[8]; + int best_pos[8][2]; + + memset(best, 64, sizeof(int)*8); +#if 1 + if(s->avctx->dia_size>=2){ + const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; + const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; + const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; + const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; + + for(ny= -3; ny <= 3; ny++){ + for(nx= -3; nx <= 3; nx++){ + const int t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t; + const int c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c; + const int b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b; + int score= ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2; + int i; + + if((nx&3)==0 && (ny&3)==0) continue; + + score += 1024*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; + +// if(nx&1) score-=1024*s->me.penalty_factor; +// if(ny&1) score-=1024*s->me.penalty_factor; + + for(i=0; i<8; i++){ + if(score < best[i]){ + memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); + memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i)); + best[i]= score; + best_pos[i][0]= nx + 4*mx; + best_pos[i][1]= ny + 4*my; + break; + } + } + } + } + }else{ + int tl; + const int cx = 4*(r - l); + const int cx2= r + l - 2*c; + const int cy = 4*(b - t); + const int cy2= b + t - 2*c; + int cxy; + + if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME + tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; + }else{ + CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different + } + + cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; + + assert(16*cx2 + 4*cx + 32*c == 32*r); + assert(16*cx2 - 4*cx + 32*c == 32*l); + assert(16*cy2 + 4*cy + 32*c == 32*b); + assert(16*cy2 - 4*cy + 32*c == 32*t); + assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl); + + for(ny= -3; ny <= 3; ny++){ + for(nx= -3; nx <= 3; nx++){ + int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor + int i; + + if((nx&3)==0 && (ny&3)==0) continue; + + score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; +// if(nx&1) score-=32*s->me.penalty_factor; + // if(ny&1) score-=32*s->me.penalty_factor; + + for(i=0; i<8; i++){ + if(score < best[i]){ + memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); + memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i)); + best[i]= score; + best_pos[i][0]= nx + 4*mx; + best_pos[i][1]= ny + 4*my; + break; + } + } + } + } + } + for(i=0; i<8; i++){ + nx= best_pos[i][0]; + ny= best_pos[i][1]; + CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2) + } +#if 0 + nx= FFMAX(4*mx - bx, bx - 4*mx); + ny= FFMAX(4*my - by, by - 4*my); + + static int stats[4][4]; + stats[nx][ny]++; + if(256*256*256*64 % (stats[0][0]+1) ==0){ + for(i=0; i<16; i++){ + if((i&3)==0) printf("\n"); + printf("%6d ", stats[0][i]); + } + printf("\n"); + } +#endif +#else + + CHECK_QUARTER_MV(2, 2, mx-1, my-1) + CHECK_QUARTER_MV(0, 2, mx , my-1) + CHECK_QUARTER_MV(2, 2, mx , my-1) + CHECK_QUARTER_MV(2, 0, mx , my ) + CHECK_QUARTER_MV(2, 2, mx , my ) + CHECK_QUARTER_MV(0, 2, mx , my ) + CHECK_QUARTER_MV(2, 2, mx-1, my ) + CHECK_QUARTER_MV(2, 0, mx-1, my ) + + nx= bx; + ny= by; + + for(i=0; i<8; i++){ + int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1}; + int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1}; + CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2) + } +#endif +#if 0 + //outer ring + CHECK_QUARTER_MV(1, 3, mx-1, my-1) + CHECK_QUARTER_MV(1, 2, mx-1, my-1) + CHECK_QUARTER_MV(1, 1, mx-1, my-1) + CHECK_QUARTER_MV(2, 1, mx-1, my-1) + CHECK_QUARTER_MV(3, 1, mx-1, my-1) + CHECK_QUARTER_MV(0, 1, mx , my-1) + CHECK_QUARTER_MV(1, 1, mx , my-1) + CHECK_QUARTER_MV(2, 1, mx , my-1) + CHECK_QUARTER_MV(3, 1, mx , my-1) + CHECK_QUARTER_MV(3, 2, mx , my-1) + CHECK_QUARTER_MV(3, 3, mx , my-1) + CHECK_QUARTER_MV(3, 0, mx , my ) + CHECK_QUARTER_MV(3, 1, mx , my ) + CHECK_QUARTER_MV(3, 2, mx , my ) + CHECK_QUARTER_MV(3, 3, mx , my ) + CHECK_QUARTER_MV(2, 3, mx , my ) + CHECK_QUARTER_MV(1, 3, mx , my ) + CHECK_QUARTER_MV(0, 3, mx , my ) + CHECK_QUARTER_MV(3, 3, mx-1, my ) + CHECK_QUARTER_MV(2, 3, mx-1, my ) + CHECK_QUARTER_MV(1, 3, mx-1, my ) + CHECK_QUARTER_MV(1, 2, mx-1, my ) + CHECK_QUARTER_MV(1, 1, mx-1, my ) + CHECK_QUARTER_MV(1, 0, mx-1, my ) +#endif + assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4); + + *mx_ptr = bx; + *my_ptr = by; + }else{ + *mx_ptr =4*mx; + *my_ptr =4*my; + } + + return dmin; +} + +#endif /* CMP_QPEL */ + +#define CHECK_MV(x,y)\ +{\ + const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ + const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ + if(map[index]!=key){\ + CMP(d, x, y, size);\ + map[index]= key;\ + score_map[index]= d;\ + d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ + COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ + }\ +} + +#define CHECK_MV_DIR(x,y,new_dir)\ +{\ + const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ + const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ + if(map[index]!=key){\ + CMP(d, x, y, size);\ + map[index]= key;\ + score_map[index]= d;\ + d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ + if(d<dmin){\ + best[0]=x;\ + best[1]=y;\ + dmin=d;\ + next_dir= new_dir;\ + }\ + }\ +} + +#define check(x,y,S,v)\ +if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\ +if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\ +if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\ +if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\ + + +static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin, + Picture *ref_picture, + int const pred_x, int const pred_y, int const penalty_factor, + int const xmin, int const ymin, int const xmax, int const ymax, int const shift, + uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty + ) +{ + me_cmp_func cmp, chroma_cmp; + int next_dir=-1; + LOAD_COMMON(s->mb_x*16, s->mb_y*16); + + cmp= s->dsp.me_cmp[size]; + chroma_cmp= s->dsp.me_cmp[size+1]; + + for(;;){ + int d; + const int dir= next_dir; + const int x= best[0]; + const int y= best[1]; + next_dir=-1; + +//printf("%d", dir); + if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0) + if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1) + if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2) + if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3) + + if(next_dir==-1){ + return dmin; + } + } +} + +static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin, + Picture *ref_picture, + int const pred_x, int const pred_y, int const penalty_factor, + int const xmin, int const ymin, int const xmax, int const ymax, int const shift, + uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty + ) +{ + me_cmp_func cmp, chroma_cmp; + int dia_size=1; + LOAD_COMMON(s->mb_x*16, s->mb_y*16); + + cmp= s->dsp.me_cmp[size]; + chroma_cmp= s->dsp.me_cmp[size+1]; + + for(dia_size=1; dia_size<=s->avctx->dia_size; dia_size++){ + int dir, start, end; + const int x= best[0]; + const int y= best[1]; + + start= FFMAX(0, y + dia_size - ymax); + end = FFMIN(dia_size, xmax - x); + for(dir= start; dir<end; dir++){ + int d; + +//check(x + dir,y + dia_size - dir,0, a0) + CHECK_MV(x + dir , y + dia_size - dir); + } + + start= FFMAX(0, x + dia_size - xmax); + end = FFMIN(dia_size, y - ymin); + for(dir= start; dir<end; dir++){ + int d; + +//check(x + dia_size - dir, y - dir,0, a1) + CHECK_MV(x + dia_size - dir, y - dir ); + } + + start= FFMAX(0, -y + dia_size + ymin ); + end = FFMIN(dia_size, x - xmin); + for(dir= start; dir<end; dir++){ + int d; + +//check(x - dir,y - dia_size + dir,0, a2) + CHECK_MV(x - dir , y - dia_size + dir); + } + + start= FFMAX(0, -x + dia_size + xmin ); + end = FFMIN(dia_size, ymax - y); + for(dir= start; dir<end; dir++){ + int d; + +//check(x - dia_size + dir, y + dir,0, a3) + CHECK_MV(x - dia_size + dir, y + dir ); + } + + if(x!=best[0] || y!=best[1]) + dia_size=0; + } + return dmin; +} + +static int RENAME(epzs_motion_search)(MpegEncContext * s, int block, + int *mx_ptr, int *my_ptr, + int P[10][2], int pred_x, int pred_y, + int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty) +{ + int best[2]={0, 0}; + int d, dmin; + const int shift= 1+s->quarter_sample; + uint32_t *map= s->me.map; + int map_generation; + const int penalty_factor= s->me.penalty_factor; + const int size=0; + me_cmp_func cmp, chroma_cmp; + LOAD_COMMON(s->mb_x*16, s->mb_y*16); + + cmp= s->dsp.me_cmp[size]; + chroma_cmp= s->dsp.me_cmp[size+1]; + + map_generation= update_map_generation(s); + + CMP(dmin, 0, 0, size); + map[0]= map_generation; + score_map[0]= dmin; + + /* first line */ + if ((s->mb_y == 0 || s->first_slice_line)) { + CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift) + }else{ + if(dmin<256 && ( P_LEFT[0] |P_LEFT[1] + |P_TOP[0] |P_TOP[1] + |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0 && s->avctx->dia_size==0){ + *mx_ptr= 0; + *my_ptr= 0; + s->me.skip=1; + return dmin; + } + CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) + if(dmin>256*2){ + CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift) + CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) + CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift) + CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) + } + } + if(dmin>256*4){ + CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift) + CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift) + } +#if 0 //doest only slow things down + if(dmin>512*3){ + int step; + dmin= score_map[0]; + best[0]= best[1]=0; + for(step=128; step>0; step>>=1){ + const int step2= step; + int y; + for(y=-step2+best[1]; y<=step2+best[1]; y+=step){ + int x; + if(y<ymin || y>ymax) continue; + + for(x=-step2+best[0]; x<=step2+best[0]; x+=step){ + if(x<xmin || x>xmax) continue; + if(x==best[0] && y==best[1]) continue; + CHECK_MV(x,y) + } + } + } + } +#endif +//check(best[0],best[1],0, b0) + if(s->avctx->dia_size<2) + dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture, + pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, + shift, map, map_generation, size, mv_penalty); + else + dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture, + pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, + shift, map, map_generation, size, mv_penalty); + +//check(best[0],best[1],0, b1) + *mx_ptr= best[0]; + *my_ptr= best[1]; + +// printf("%d %d %d \n", best[0], best[1], dmin); + return dmin; +} + +#ifndef CMP_DIRECT /* no 4mv search needed in direct mode */ +static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block, + int *mx_ptr, int *my_ptr, + int P[10][2], int pred_x, int pred_y, + int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty) +{ + int best[2]={0, 0}; + int d, dmin; + const int shift= 1+s->quarter_sample; + uint32_t *map= s->me.map; + int map_generation; + const int penalty_factor= s->me.penalty_factor; + const int size=1; + me_cmp_func cmp, chroma_cmp; + LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8); + + cmp= s->dsp.me_cmp[size]; + chroma_cmp= s->dsp.me_cmp[size+1]; + + map_generation= update_map_generation(s); + + dmin = 1000000; +//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); + /* first line */ + if ((s->mb_y == 0 || s->first_slice_line) && block<2) { + CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift) + CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) + }else{ + CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) + //FIXME try some early stop + if(dmin>64*2){ + CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) + CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift) + CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) + CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift) + } + } + if(dmin>64*4){ + CHECK_MV(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift) + CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift) + } + + if(s->avctx->dia_size<2) + dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture, + pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, + shift, map, map_generation, size, mv_penalty); + else + dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture, + pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, + shift, map, map_generation, size, mv_penalty); + *mx_ptr= best[0]; + *my_ptr= best[1]; + +// printf("%d %d %d \n", best[0], best[1], dmin); + return dmin; +} +#endif /* !CMP_DIRECT */ diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c index 17a9aec20a..5dc3e6218e 100644 --- a/libavcodec/mpeg12.c +++ b/libavcodec/mpeg12.c @@ -526,7 +526,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s) } } } - s->mv_penalty= mv_penalty; + s->me.mv_penalty= mv_penalty; s->fcode_tab= fcode_tab; s->min_qcoeff=-255; s->max_qcoeff= 255; diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c index a7808e107b..7c5cf59c87 100644 --- a/libavcodec/mpegvideo.c +++ b/libavcodec/mpegvideo.c @@ -43,8 +43,6 @@ static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w); static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow); void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c; -static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, - int src_x, int src_y, int w, int h); /* enable all paranoid tests for rounding, overflows, etc... */ @@ -64,8 +62,8 @@ static const uint16_t aanscales[64] = { 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315, 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520, 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552, - 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446, - 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247 + 8867 , 12299, 11585, 10426, 8867, 6967, 4799, 2446, + 4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247 }; /* Input permutation for the simple_idct_mmx */ @@ -87,9 +85,6 @@ static const uint8_t h263_chroma_roundtab[16] = { static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL; static UINT8 default_fcode_tab[MAX_MV*2+1]; -/* default motion estimation */ -int motion_estimation_method = ME_EPZS; - static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64], const UINT16 *quant_matrix, int bias, int qmin, int qmax) { @@ -394,15 +389,13 @@ int MPV_common_init(MpegEncContext *s) CHECKED_ALLOCZ(s->b_back_mv_table , mv_table_size * 2 * sizeof(INT16)) CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16)) CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16)) - CHECKED_ALLOCZ(s->b_direct_forw_mv_table, mv_table_size * 2 * sizeof(INT16)) - CHECKED_ALLOCZ(s->b_direct_back_mv_table, mv_table_size * 2 * sizeof(INT16)) CHECKED_ALLOCZ(s->b_direct_mv_table , mv_table_size * 2 * sizeof(INT16)) //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer() - CHECKED_ALLOCZ(s->me_scratchpad, s->width*2*16*3*sizeof(uint8_t)) + CHECKED_ALLOCZ(s->me.scratchpad, s->width*2*16*3*sizeof(uint8_t)) - CHECKED_ALLOCZ(s->me_map , ME_MAP_SIZE*sizeof(uint32_t)) - CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t)) + CHECKED_ALLOCZ(s->me.map , ME_MAP_SIZE*sizeof(uint32_t)) + CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t)) if(s->codec_id==CODEC_ID_MPEG4){ CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE); @@ -498,8 +491,6 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->b_back_mv_table); av_freep(&s->b_bidir_forw_mv_table); av_freep(&s->b_bidir_back_mv_table); - av_freep(&s->b_direct_forw_mv_table); - av_freep(&s->b_direct_back_mv_table); av_freep(&s->b_direct_mv_table); av_freep(&s->motion_val); av_freep(&s->dc_val[0]); @@ -508,9 +499,9 @@ void MPV_common_end(MpegEncContext *s) av_freep(&s->mbintra_table); av_freep(&s->cbp_table); av_freep(&s->pred_dir_table); - av_freep(&s->me_scratchpad); - av_freep(&s->me_map); - av_freep(&s->me_score_map); + av_freep(&s->me.scratchpad); + av_freep(&s->me.map); + av_freep(&s->me.score_map); av_freep(&s->mbskip_table); av_freep(&s->bitstream_buffer); @@ -566,6 +557,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->chroma_elim_threshold= avctx->chroma_elim_threshold; s->strict_std_compliance= avctx->strict_std_compliance; s->data_partitioning= avctx->flags & CODEC_FLAG_PART; + s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0; s->mpeg_quant= avctx->mpeg_quant; if (s->gop_size <= 1) { @@ -575,12 +567,7 @@ int MPV_encode_init(AVCodecContext *avctx) s->intra_only = 0; } - /* ME algorithm */ - if (avctx->me_method == 0) - /* For compatibility */ - s->me_method = motion_estimation_method; - else - s->me_method = avctx->me_method; + s->me_method = avctx->me_method; /* Fixed QSCALE */ s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE); @@ -713,13 +700,14 @@ int MPV_encode_init(AVCodecContext *avctx) } } } - s->mv_penalty= default_mv_penalty; + s->me.mv_penalty= default_mv_penalty; s->fcode_tab= default_fcode_tab; s->y_dc_scale_table= s->c_dc_scale_table= ff_mpeg1_dc_scale_table; /* dont use mv_penalty table for crap MV as it would be confused */ - if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty; + //FIXME remove after fixing / removing old ME + if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty; s->encoding = 1; @@ -727,6 +715,8 @@ int MPV_encode_init(AVCodecContext *avctx) if (MPV_common_init(s) < 0) return -1; + ff_init_me(s); + #ifdef CONFIG_ENCODERS if (s->out_format == FMT_H263) h263_encode_init(s); @@ -947,6 +937,18 @@ void MPV_frame_end(MpegEncContext *s) if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/) s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]); } + if(s->avctx->debug&FF_DEBUG_SKIP){ + int x,y; + for(y=0; y<s->mb_height; y++){ + for(x=0; x<s->mb_width; x++){ + int count= s->mbskip_table[x + y*s->mb_width]; + if(count>9) count=9; + printf(" %1d", count); + } + printf("\n"); + } + printf("pict type: %d\n", s->pict_type); + } } static int get_sae(uint8_t *src, int ref, int stride){ @@ -1284,7 +1286,7 @@ static inline void gmc1_motion(MpegEncContext *s, if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos || src_y + (motion_y&15) + 16 > s->v_edge_pos){ - emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos); + ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos); ptr= s->edge_emu_buffer; emu=1; } @@ -1322,14 +1324,14 @@ static inline void gmc1_motion(MpegEncContext *s, offset = (src_y * uvlinesize) + src_x + (src_offset>>1); ptr = ref_picture[1] + offset; if(emu){ - emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer; } s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); ptr = ref_picture[2] + offset; if(emu){ - emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer; } s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding); @@ -1401,7 +1403,7 @@ static inline void gmc_motion(MpegEncContext *s, } -static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, +void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, int src_x, int src_y, int w, int h){ int x, y; int start_y, start_x, end_y, end_x; @@ -1501,7 +1503,7 @@ if(s->quarter_sample) if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos || src_y + (motion_y&1) + h > v_edge_pos){ - emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, + ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos); ptr= s->edge_emu_buffer + src_offset; emu=1; @@ -1538,7 +1540,7 @@ if(s->quarter_sample) offset = (src_y * uvlinesize) + src_x + (src_offset >> 1); ptr = ref_picture[1] + offset; if(emu){ - emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, + ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer + (src_offset >> 1); } @@ -1546,7 +1548,7 @@ if(s->quarter_sample) ptr = ref_picture[2] + offset; if(emu){ - emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, + ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based, src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer + (src_offset >> 1); } @@ -1586,7 +1588,7 @@ static inline void qpel_motion(MpegEncContext *s, if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos || src_y + (motion_y&3) + h > v_edge_pos){ - emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, + ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based, src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos); ptr= s->edge_emu_buffer + src_offset; emu=1; @@ -1631,7 +1633,7 @@ static inline void qpel_motion(MpegEncContext *s, offset = (src_y * uvlinesize) + src_x + (src_offset >> 1); ptr = ref_picture[1] + offset; if(emu){ - emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, + ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer + (src_offset >> 1); } @@ -1639,7 +1641,7 @@ static inline void qpel_motion(MpegEncContext *s, ptr = ref_picture[2] + offset; if(emu){ - emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, + ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based, src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer + (src_offset >> 1); } @@ -1675,6 +1677,10 @@ static inline void MPV_motion(MpegEncContext *s, ref_picture, 0, 0, pix_op, qpix_op, s->mv[dir][0][0], s->mv[dir][0][1], 16); + }else if(s->mspel){ + ff_mspel_motion(s, dest_y, dest_cb, dest_cr, + ref_picture, pix_op, + s->mv[dir][0][0], s->mv[dir][0][1], 16); }else{ mpeg_motion(s, dest_y, dest_cb, dest_cr, 0, ref_picture, 0, @@ -1706,7 +1712,7 @@ static inline void MPV_motion(MpegEncContext *s, if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos || src_y + (motion_y&3) + 8 > s->v_edge_pos){ - emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos); + ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos); ptr= s->edge_emu_buffer; } } @@ -1737,7 +1743,7 @@ static inline void MPV_motion(MpegEncContext *s, if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos || src_y + (motion_y&1) + 8 > s->v_edge_pos){ - emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos); + ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos); ptr= s->edge_emu_buffer; } } @@ -1784,7 +1790,7 @@ static inline void MPV_motion(MpegEncContext *s, if(s->flags&CODEC_FLAG_EMU_EDGE){ if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1 || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){ - emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer; emu=1; } @@ -1793,7 +1799,7 @@ static inline void MPV_motion(MpegEncContext *s, ptr = ref_picture[2] + offset; if(emu){ - emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); ptr= s->edge_emu_buffer; } pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8); @@ -1928,7 +1934,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */ if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here - + //FIXME a lot of thet is only needed for !low_delay const int wrap = s->block_wrap[0]; const int xy = s->block_index[0]; const int mb_index= s->mb_x + s->mb_y*s->mb_width; @@ -2064,7 +2070,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize); add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize); } - } else { + } else if(s->codec_id != CODEC_ID_WMV2){ add_dct(s, block[0], 0, dest_y, dct_linesize); add_dct(s, block[1], 1, dest_y + 8, dct_linesize); add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize); @@ -2074,6 +2080,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) add_dct(s, block[4], 4, dest_cb, s->uvlinesize); add_dct(s, block[5], 5, dest_cr, s->uvlinesize); } + } else{ + ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr); } } else { /* dct only in intra block */ @@ -2376,7 +2384,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16; if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){ - emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height); + ff_emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height); ptr= s->edge_emu_buffer; emu=1; } @@ -2408,14 +2416,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) int wrap_c = s->uvlinesize; ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8; if(emu){ - emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); ptr= s->edge_emu_buffer; } s->dsp.get_pixels(s->block[4], ptr, wrap_c); ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8; if(emu){ - emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); ptr= s->edge_emu_buffer; } s->dsp.get_pixels(s->block[5], ptr, wrap_c); @@ -2455,7 +2463,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) } if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){ - emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height); + ff_emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height); ptr_y= s->edge_emu_buffer; emu=1; } @@ -2487,12 +2495,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) skip_dct[5]= 1; }else{ if(emu){ - emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + ff_emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); ptr_cb= s->edge_emu_buffer; } s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c); if(emu){ - emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); + ff_emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1); ptr_cr= s->edge_emu_buffer; } s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c); @@ -2574,21 +2582,25 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y) #ifdef CONFIG_ENCODERS /* huffman encode */ - switch(s->out_format) { - case FMT_MPEG1: - mpeg1_encode_mb(s, s->block, motion_x, motion_y); - break; - case FMT_H263: - if (s->h263_msmpeg4) - msmpeg4_encode_mb(s, s->block, motion_x, motion_y); - else if(s->h263_pred) - mpeg4_encode_mb(s, s->block, motion_x, motion_y); - else - h263_encode_mb(s, s->block, motion_x, motion_y); - break; - case FMT_MJPEG: - mjpeg_encode_mb(s, s->block); - break; + switch(s->codec_id){ //FIXME funct ptr could be slightly faster + case CODEC_ID_MPEG1VIDEO: + mpeg1_encode_mb(s, s->block, motion_x, motion_y); break; + case CODEC_ID_MPEG4: + mpeg4_encode_mb(s, s->block, motion_x, motion_y); break; + case CODEC_ID_MSMPEG4V2: + case CODEC_ID_MSMPEG4V3: + case CODEC_ID_WMV1: + msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break; + case CODEC_ID_WMV2: + ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break; + case CODEC_ID_MJPEG: + mjpeg_encode_mb(s, s->block); break; + case CODEC_ID_H263: + case CODEC_ID_H263P: + case CODEC_ID_RV10: + h263_encode_mb(s, s->block, motion_x, motion_y); break; + default: + assert(0); } #endif } @@ -2704,13 +2716,18 @@ static inline int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, in int x,y; if(w==16 && h==16) - return s->dsp.pix_norm(src1, src2, stride); + return s->dsp.sse[0](NULL, src1, src2, stride); + else if(w==8 && h==8) + return s->dsp.sse[1](NULL, src1, src2, stride); for(y=0; y<h; y++){ for(x=0; x<w; x++){ acc+= sq[src1[x + y*stride] - src2[x + y*stride]]; } } + + assert(acc>=0); + return acc; } @@ -2751,6 +2768,18 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->scene_change_score=0; s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration + + if(s->msmpeg4_version){ + if(s->pict_type==I_TYPE) + s->no_rounding=1; + else if(s->flipflop_rounding) + s->no_rounding ^= 1; + }else{ + if(s->pict_type==I_TYPE) + s->no_rounding=0; + else if(s->pict_type!=B_TYPE) + s->no_rounding ^= 1; + } /* Estimate motion for every MB */ if(s->pict_type != I_TYPE){ @@ -2772,7 +2801,6 @@ static void encode_picture(MpegEncContext *s, int picture_number) ff_estimate_b_frame_motion(s, mb_x, mb_y); else ff_estimate_p_frame_motion(s, mb_x, mb_y); -// s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER; } } }else /* if(s->pict_type == I_TYPE) */{ @@ -2867,7 +2895,9 @@ static void encode_picture(MpegEncContext *s, int picture_number) mjpeg_picture_header(s); break; case FMT_H263: - if (s->h263_msmpeg4) + if (s->codec_id == CODEC_ID_WMV2) + ff_wmv2_encode_picture_header(s, picture_number); + else if (s->h263_msmpeg4) msmpeg4_encode_picture_header(s, picture_number); else if (s->h263_pred) mpeg4_encode_picture_header(s, picture_number); @@ -3049,15 +3079,14 @@ static void encode_picture(MpegEncContext *s, int picture_number) &dmin, &next_block, 0, 0); } if(mb_type&MB_TYPE_DIRECT){ + int mx= s->b_direct_mv_table[xy][0]; + int my= s->b_direct_mv_table[xy][1]; + s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT; - s->mv_type = MV_TYPE_16X16; //FIXME s->mb_intra= 0; - s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0]; - s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1]; - s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0]; - s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1]; + ff_mpeg4_set_direct_mv(s, mx, my); encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb, - &dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]); + &dmin, &next_block, mx, my); } if(mb_type&MB_TYPE_INTRA){ s->mv_dir = MV_DIR_FORWARD; @@ -3122,10 +3151,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) s->mb_intra= 0; motion_x=s->b_direct_mv_table[xy][0]; motion_y=s->b_direct_mv_table[xy][1]; - s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0]; - s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1]; - s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0]; - s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1]; + ff_mpeg4_set_direct_mv(s, motion_x, motion_y); break; case MB_TYPE_BIDIR: s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD; @@ -3170,7 +3196,7 @@ static void encode_picture(MpegEncContext *s, int picture_number) if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16; if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16; - + s->current_picture.error[0] += sse( s, s->new_picture .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, @@ -3471,6 +3497,7 @@ char ff_get_pict_type_char(int pict_type){ case P_TYPE: return 'P'; case B_TYPE: return 'B'; case S_TYPE: return 'S'; + default: return '?'; } } @@ -3574,12 +3601,3 @@ AVCodec wmv1_encoder = { MPV_encode_end, }; -AVCodec wmv2_encoder = { - "wmv2", - CODEC_TYPE_VIDEO, - CODEC_ID_WMV2, - sizeof(MpegEncContext), - MPV_encode_init, - MPV_encode_picture, - MPV_encode_end, -}; diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h index ca0054418f..84e28f9d76 100644 --- a/libavcodec/mpegvideo.h +++ b/libavcodec/mpegvideo.h @@ -129,6 +129,31 @@ typedef struct ParseContext{ int frame_start_found; } ParseContext; +struct MpegEncContext; + +typedef struct MotionEstContext{ + int skip; /* set if ME is skiped for the current MB */ + int co_located_mv[4][2]; /* mv from last p frame for direct mode ME */ + int direct_basis_mv[4][2]; + uint8_t *scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */ + uint32_t *map; /* map to avoid duplicate evaluations */ + uint32_t *score_map; /* map to store the scores */ + int map_generation; + int penalty_factor; + int sub_penalty_factor; + UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV */ + int (*sub_motion_search)(struct MpegEncContext * s, + int *mx_ptr, int *my_ptr, int dmin, + int xmin, int ymin, int xmax, int ymax, + int pred_x, int pred_y, Picture *ref_picture, + int n, int size, uint16_t * const mv_penalty); + int (*motion_search[7])(struct MpegEncContext * s, int block, + int *mx_ptr, int *my_ptr, + int P[10][2], int pred_x, int pred_y, + int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, + uint16_t * const mv_penalty); +}MotionEstContext; + typedef struct MpegEncContext { struct AVCodecContext *avctx; /* the following parameters must be initialized before encoding */ @@ -222,15 +247,8 @@ typedef struct MpegEncContext { INT16 (*b_back_mv_table)[2]; /* MV table (1MV per MB) backward mode b-frame encoding */ INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */ INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */ - INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */ - INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */ INT16 (*b_direct_mv_table)[2]; /* MV table (1MV per MB) direct mode b-frame encoding */ int me_method; /* ME algorithm */ - uint8_t *me_scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */ - uint32_t *me_map; /* map to avoid duplicate evaluations */ - uint16_t *me_score_map; /* map to store the SADs */ - int me_map_generation; - int skip_me; /* set if ME is skiped for the current MB */ int scene_change_score; int mv_dir; #define MV_DIR_BACKWARD 1 @@ -250,8 +268,9 @@ typedef struct MpegEncContext { int mv[2][4][2]; int field_select[2][2]; int last_mv[2][2][2]; /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */ - UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */ UINT8 *fcode_tab; /* smallest fcode needed for each MV */ + + MotionEstContext me; int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...) for b-frames rounding mode is allways 0 */ @@ -458,6 +477,7 @@ typedef struct MpegEncContext { /* [mb_intra][isChroma][level][run][last] */ int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2]; int inter_intra_pred; + int mspel; /* decompression specific */ GetBitContext gb; @@ -519,6 +539,7 @@ typedef struct MpegEncContext { void (*fdct)(DCTELEM *block/* align 16*/); void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); + //FIXME move above funcs into dspContext perhaps } MpegEncContext; @@ -528,6 +549,9 @@ void MPV_common_end(MpegEncContext *s); void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx); void MPV_frame_end(MpegEncContext *s); +int MPV_encode_init(AVCodecContext *avctx); +int MPV_encode_end(AVCodecContext *avctx); +int MPV_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data); #ifdef HAVE_MMX void MPV_common_init_mmx(MpegEncContext *s); #endif @@ -553,6 +577,8 @@ void ff_clean_intra_table_entries(MpegEncContext *s); void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable); void ff_error_resilience(MpegEncContext *s); void ff_draw_horiz_band(MpegEncContext *s); +void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h, + int src_x, int src_y, int w, int h); char ff_get_pict_type_char(int pict_type); @@ -585,6 +611,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type); void ff_fix_long_p_mvs(MpegEncContext * s); void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type); +void ff_init_me(MpegEncContext *s); /* mpeg12.c */ @@ -631,6 +658,11 @@ extern UINT8 ff_mpeg4_y_dc_scale_table[32]; extern UINT8 ff_mpeg4_c_dc_scale_table[32]; extern const INT16 ff_mpeg4_default_intra_matrix[64]; extern const INT16 ff_mpeg4_default_non_intra_matrix[64]; +int ff_h263_decode_init(AVCodecContext *avctx); +int ff_h263_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size); +int ff_h263_decode_end(AVCodecContext *avctx); void h263_encode_mb(MpegEncContext *s, DCTELEM block[6][64], int motion_x, int motion_y); @@ -667,6 +699,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s); int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s); int ff_h263_resync(MpegEncContext *s); int ff_h263_get_gob_height(MpegEncContext *s); +void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my); /* rv10.c */ @@ -684,7 +717,16 @@ int msmpeg4_decode_picture_header(MpegEncContext * s); int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size); int ff_msmpeg4_decode_init(MpegEncContext *s); void ff_msmpeg4_encode_init(MpegEncContext *s); - +int ff_wmv2_decode_picture_header(MpegEncContext * s); +void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr); +void ff_mspel_motion(MpegEncContext *s, + UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, + UINT8 **ref_picture, op_pixels_func (*pix_op)[4], + int motion_x, int motion_y, int h); +int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number); +void ff_wmv2_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y); /* mjpegenc.c */ int mjpeg_init(MpegEncContext *s); diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c index 76fa3c2d49..a08418874c 100644 --- a/libavcodec/msmpeg4.c +++ b/libavcodec/msmpeg4.c @@ -48,12 +48,14 @@ #define II_BITRATE 128*1024 #define MBAC_BITRATE 50*1024 +#define DEFAULT_INTER_INDEX 3 + static UINT32 v2_dc_lum_table[512][2]; static UINT32 v2_dc_chroma_table[512][2]; static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n); static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, - int n, int coded); + int n, int coded, const uint8_t *scantable); static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr); static int msmpeg4_decode_motion(MpegEncContext * s, int *mx_ptr, int *my_ptr); @@ -63,6 +65,7 @@ static inline void msmpeg4_memsetw(short *tab, int val, int n); static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra); static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); +static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); extern UINT32 inverse[256]; @@ -160,13 +163,14 @@ static void common_init(MpegEncContext * s) } break; case 4: + case 5: s->y_dc_scale_table= wmv1_y_dc_scale_table; s->c_dc_scale_table= wmv1_c_dc_scale_table; break; } - if(s->msmpeg4_version==4){ + if(s->msmpeg4_version>=4){ ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]); ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]); ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]); @@ -370,9 +374,9 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) s->per_mb_rl_table = 0; if(s->msmpeg4_version==4) s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE); +//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height); if (s->pict_type == I_TYPE) { - s->no_rounding = 1; s->slice_height= s->mb_height/1; put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height); @@ -404,12 +408,6 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number) put_bits(&s->pb, 1, s->mv_table_index); } - - if(s->flipflop_rounding){ - s->no_rounding ^= 1; - }else{ - s->no_rounding = 0; - } } s->esc3_level_length= 0; @@ -923,7 +921,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int } /* recalculate block_last_index for M$ wmv1 */ - if(s->msmpeg4_version==4 && s->block_last_index[n]>0){ + if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){ for(last_index=63; last_index>=0; last_index--){ if(block[scantable[last_index]]) break; } @@ -975,7 +973,7 @@ else /* third escape */ put_bits(&s->pb, 1, 0); put_bits(&s->pb, 1, last); - if(s->msmpeg4_version==4){ + if(s->msmpeg4_version>=4){ if(s->esc3_level_length==0){ s->esc3_level_length=8; s->esc3_run_length= 6; @@ -1014,7 +1012,7 @@ else /****************************************/ /* decoding stuff */ -static VLC mb_non_intra_vlc; +static VLC mb_non_intra_vlc[4]; static VLC mb_intra_vlc; static VLC dc_lum_vlc[2]; static VLC dc_chroma_vlc[2]; @@ -1139,9 +1137,12 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) &mvtab[0][1], 2, 1, &mvtab[0][0], 2, 1); - init_vlc(&mb_non_intra_vlc, MB_NON_INTRA_VLC_BITS, 128, - &table_mb_non_intra[0][1], 8, 4, - &table_mb_non_intra[0][0], 8, 4); + for(i=0; i<4; i++){ + init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128, + &wmv2_inter_table[i][0][1], 8, 4, + &wmv2_inter_table[i][0][0], 8, 4); //FIXME name? + } + init_vlc(&mb_intra_vlc, MB_INTRA_VLC_BITS, 64, &table_mb_intra[0][1], 4, 2, &table_mb_intra[0][0], 4, 2); @@ -1167,6 +1168,9 @@ int ff_msmpeg4_decode_init(MpegEncContext *s) case 4: s->decode_mb= msmpeg4v34_decode_mb; break; + case 5: + s->decode_mb= wmv2_decode_mb; + break; } s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe @@ -1334,6 +1338,7 @@ return -1; s->no_rounding = 0; } } +//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height); s->esc3_level_length= 0; s->esc3_run_length= 0; @@ -1523,7 +1528,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) } for (i = 0; i < 6; i++) { - if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) + if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) { fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); return -1; @@ -1566,7 +1571,7 @@ printf("S "); } } - code = get_vlc2(&s->gb, mb_non_intra_vlc.table, MB_NON_INTRA_VLC_BITS, 3); + code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3); if (code < 0) return -1; //s->mb_intra = (code & 0x40) ? 0 : 1; @@ -1628,7 +1633,7 @@ printf("%c", s->ac_pred ? 'A' : 'I'); } for (i = 0; i < 6; i++) { - if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0) + if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) { fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); return -1; @@ -1639,13 +1644,12 @@ printf("%c", s->ac_pred ? 'A' : 'I'); } //#define ERROR_DETAILS static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, - int n, int coded) + int n, int coded, const uint8_t *scan_table) { int level, i, last, run, run_diff; int dc_pred_dir; RLTable *rl; RL_VLC_ELEM *rl_vlc; - const UINT8 *scan_table; int qmul, qadd; if (s->mb_intra) { @@ -1713,7 +1717,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, s->block_last_index[n] = i; return 0; } - scan_table = s->inter_scantable.permutated; + if(!scan_table) + scan_table = s->inter_scantable.permutated; set_stat(ST_INTER_AC); rl_vlc= rl->rl_vlc[s->qscale]; } @@ -1889,7 +1894,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block, i = 63; /* XXX: not optimal */ } } - if(s->msmpeg4_version==4 && i>0) i=63; //FIXME/XXX optimize + if(s->msmpeg4_version>=4 && i>0) i=63; //FIXME/XXX optimize s->block_last_index[n] = i; return 0; @@ -1990,3 +1995,9 @@ static int msmpeg4_decode_motion(MpegEncContext * s, *my_ptr = my; return 0; } + +/* cleanest way to support it + * there is too much shared between versions so that we cant have 1 file per version & 1 common + * as allmost everything would be in the common file + */ +#include "wmv2.c" diff --git a/libavcodec/msmpeg4data.h b/libavcodec/msmpeg4data.h index 93a72c54c1..3490fc08c4 100644 --- a/libavcodec/msmpeg4data.h +++ b/libavcodec/msmpeg4data.h @@ -3,7 +3,7 @@ */ /* intra picture macro block coded block pattern */ -static const UINT16 table_mb_intra[64][2] = { +static const uint16_t table_mb_intra[64][2] = { { 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 }, { 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 }, { 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 }, @@ -23,7 +23,7 @@ static const UINT16 table_mb_intra[64][2] = { }; /* non intra picture macro block coded block pattern + mb type */ -static const UINT32 table_mb_non_intra[128][2] = { +static const uint32_t table_mb_non_intra[128][2] = { { 0x40, 7 },{ 0x13c9, 13 },{ 0x9fd, 12 },{ 0x1fc, 15 }, { 0x9fc, 12 },{ 0xa83, 18 },{ 0x12d34, 17 },{ 0x83bc, 16 }, { 0x83a, 12 },{ 0x7f8, 17 },{ 0x3fd, 16 },{ 0x3ff, 16 }, @@ -60,7 +60,7 @@ static const UINT32 table_mb_non_intra[128][2] = { /* dc table 0 */ -static const UINT32 table0_dc_lum[120][2] = { +static const uint32_t table0_dc_lum[120][2] = { { 0x1, 1 },{ 0x1, 2 },{ 0x1, 4 },{ 0x1, 5 }, { 0x5, 5 },{ 0x7, 5 },{ 0x8, 6 },{ 0xc, 6 }, { 0x0, 7 },{ 0x2, 7 },{ 0x12, 7 },{ 0x1a, 7 }, @@ -93,7 +93,7 @@ static const UINT32 table0_dc_lum[120][2] = { { 0x6078c, 24 },{ 0x6078d, 24 },{ 0x6078e, 24 },{ 0x6078f, 24 }, }; -static const UINT32 table0_dc_chroma[120][2] = { +static const uint32_t table0_dc_chroma[120][2] = { { 0x0, 2 },{ 0x1, 2 },{ 0x5, 3 },{ 0x9, 4 }, { 0xd, 4 },{ 0x11, 5 },{ 0x1d, 5 },{ 0x1f, 5 }, { 0x21, 6 },{ 0x31, 6 },{ 0x38, 6 },{ 0x33, 6 }, @@ -128,7 +128,7 @@ static const UINT32 table0_dc_chroma[120][2] = { /* dc table 1 */ -static const UINT32 table1_dc_lum[120][2] = { +static const uint32_t table1_dc_lum[120][2] = { { 0x2, 2 },{ 0x3, 2 },{ 0x3, 3 },{ 0x2, 4 }, { 0x5, 4 },{ 0x1, 5 },{ 0x3, 5 },{ 0x8, 5 }, { 0x0, 6 },{ 0x5, 6 },{ 0xd, 6 },{ 0xf, 6 }, @@ -161,7 +161,7 @@ static const UINT32 table1_dc_lum[120][2] = { { 0x1e6964, 26 },{ 0x1e6965, 26 },{ 0x1e6966, 26 },{ 0x1e6967, 26 }, }; -static const UINT32 table1_dc_chroma[120][2] = { +static const uint32_t table1_dc_chroma[120][2] = { { 0x0, 2 },{ 0x1, 2 },{ 0x4, 3 },{ 0x7, 3 }, { 0xb, 4 },{ 0xd, 4 },{ 0x15, 5 },{ 0x28, 6 }, { 0x30, 6 },{ 0x32, 6 },{ 0x52, 7 },{ 0x62, 7 }, @@ -196,7 +196,7 @@ static const UINT32 table1_dc_chroma[120][2] = { /* vlc table 0, for intra luma */ -static const UINT16 table0_vlc[133][2] = { +static const uint16_t table0_vlc[133][2] = { { 0x1, 2 },{ 0x6, 3 },{ 0xf, 4 },{ 0x16, 5 }, { 0x20, 6 },{ 0x18, 7 },{ 0x8, 8 },{ 0x9a, 8 }, { 0x56, 9 },{ 0x13e, 9 },{ 0xf0, 10 },{ 0x3a5, 10 }, @@ -233,7 +233,7 @@ static const UINT16 table0_vlc[133][2] = { { 0x16, 7 }, }; -static const INT8 table0_level[132] = { +static const int8_t table0_level[132] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1, 2, 3, 4, 5, 6, 7, 8, @@ -253,7 +253,7 @@ static const INT8 table0_level[132] = { 1, 1, 1, 1, }; -static const INT8 table0_run[132] = { +static const int8_t table0_run[132] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, @@ -275,7 +275,7 @@ static const INT8 table0_run[132] = { /* vlc table 1, for intra chroma and P macro blocks */ -static const UINT16 table1_vlc[149][2] = { +static const uint16_t table1_vlc[149][2] = { { 0x4, 3 },{ 0x14, 5 },{ 0x17, 7 },{ 0x7f, 8 }, { 0x154, 9 },{ 0x1f2, 10 },{ 0xbf, 11 },{ 0x65, 12 }, { 0xaaa, 12 },{ 0x630, 13 },{ 0x1597, 13 },{ 0x3b7, 14 }, @@ -316,7 +316,7 @@ static const UINT16 table1_vlc[149][2] = { { 0xd, 9 }, }; -static const INT8 table1_level[148] = { +static const int8_t table1_level[148] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, @@ -338,7 +338,7 @@ static const INT8 table1_level[148] = { 1, 1, 1, 1, }; -static const INT8 table1_run[148] = { +static const int8_t table1_run[148] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, @@ -362,7 +362,7 @@ static const INT8 table1_run[148] = { /* third vlc table */ -static const UINT16 table2_vlc[186][2] = { +static const uint16_t table2_vlc[186][2] = { { 0x1, 2 },{ 0x5, 3 },{ 0xd, 4 },{ 0x12, 5 }, { 0xe, 6 },{ 0x15, 7 },{ 0x13, 8 },{ 0x3f, 8 }, { 0x4b, 9 },{ 0x11f, 9 },{ 0xb8, 10 },{ 0x3e3, 10 }, @@ -412,7 +412,7 @@ static const UINT16 table2_vlc[186][2] = { { 0x23dc, 14 },{ 0x4a, 9 }, }; -static const INT8 table2_level[185] = { +static const int8_t table2_level[185] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 1, 2, 3, 4, 5, @@ -439,7 +439,7 @@ static const INT8 table2_level[185] = { 1, }; -static const INT8 table2_run[185] = { +static const int8_t table2_run[185] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, @@ -467,7 +467,7 @@ static const INT8 table2_run[185] = { }; /* second non intra vlc table */ -static const UINT16 table4_vlc[169][2] = { +static const uint16_t table4_vlc[169][2] = { { 0x0, 3 },{ 0x3, 4 },{ 0xb, 5 },{ 0x14, 6 }, { 0x3f, 6 },{ 0x5d, 7 },{ 0xa2, 8 },{ 0xac, 9 }, { 0x16e, 9 },{ 0x20a, 10 },{ 0x2e2, 10 },{ 0x432, 11 }, @@ -513,7 +513,7 @@ static const UINT16 table4_vlc[169][2] = { { 0x169, 9 }, }; -static const INT8 table4_level[168] = { +static const int8_t table4_level[168] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1, @@ -537,7 +537,7 @@ static const INT8 table4_level[168] = { 1, 1, 1, 1, 1, 1, 1, 1, }; -static const INT8 table4_run[168] = { +static const int8_t table4_run[168] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, @@ -561,25 +561,25 @@ static const INT8 table4_run[168] = { 29, 30, 31, 32, 33, 34, 35, 36, }; -extern const UINT16 inter_vlc[103][2]; -extern const INT8 inter_level[102]; -extern const INT8 inter_run[102]; +extern const uint16_t inter_vlc[103][2]; +extern const int8_t inter_level[102]; +extern const int8_t inter_run[102]; -extern const UINT16 intra_vlc[103][2]; -extern const INT8 intra_level[102]; -extern const INT8 intra_run[102]; +extern const uint16_t intra_vlc[103][2]; +extern const int8_t intra_level[102]; +extern const int8_t intra_run[102]; -extern const UINT8 DCtab_lum[13][2]; -extern const UINT8 DCtab_chrom[13][2]; +extern const uint8_t DCtab_lum[13][2]; +extern const uint8_t DCtab_chrom[13][2]; -extern const UINT8 cbpy_tab[16][2]; -extern const UINT8 mvtab[33][2]; +extern const uint8_t cbpy_tab[16][2]; +extern const uint8_t mvtab[33][2]; -extern const UINT8 intra_MCBPC_code[8]; -extern const UINT8 intra_MCBPC_bits[8]; +extern const uint8_t intra_MCBPC_code[8]; +extern const uint8_t intra_MCBPC_bits[8]; -extern const UINT8 inter_MCBPC_code[25]; -extern const UINT8 inter_MCBPC_bits[25]; +extern const uint8_t inter_MCBPC_code[25]; +extern const uint8_t inter_MCBPC_bits[25]; #define NB_RL_TABLES 6 @@ -632,7 +632,7 @@ static RLTable rl_table[NB_RL_TABLES] = { /* motion vector table 0 */ -static const UINT16 table0_mv_code[1100] = { +static const uint16_t table0_mv_code[1100] = { 0x0001, 0x0003, 0x0005, 0x0007, 0x0003, 0x0008, 0x000c, 0x0001, 0x0002, 0x001b, 0x0006, 0x000b, 0x0015, 0x0002, 0x000e, 0x000f, 0x0014, 0x0020, 0x0022, 0x0025, 0x0027, 0x0029, 0x002d, 0x004b, @@ -773,7 +773,7 @@ static const UINT16 table0_mv_code[1100] = { 0x5f0d, 0x5f0e, 0x5f0f, 0x0000, }; -static const UINT8 table0_mv_bits[1100] = { +static const uint8_t table0_mv_bits[1100] = { 1, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, @@ -914,7 +914,7 @@ static const UINT8 table0_mv_bits[1100] = { 17, 17, 17, 8, }; -static const UINT8 table0_mvx[1099] = { +static const uint8_t table0_mvx[1099] = { 32, 32, 31, 32, 33, 31, 33, 31, 33, 32, 34, 32, 30, 32, 31, 34, 35, 32, 34, 33, 29, 33, 30, 30, @@ -1055,7 +1055,7 @@ static const UINT8 table0_mvx[1099] = { 61, 19, 19, }; -static const UINT8 table0_mvy[1099] = { +static const uint8_t table0_mvy[1099] = { 32, 31, 32, 33, 32, 31, 31, 33, 33, 34, 32, 30, 32, 35, 34, 31, 32, 29, 33, 30, 32, 34, 33, 31, @@ -1197,7 +1197,7 @@ static const UINT8 table0_mvy[1099] = { }; /* motion vector table 1 */ -static const UINT16 table1_mv_code[1100] = { +static const uint16_t table1_mv_code[1100] = { 0x0000, 0x0007, 0x0009, 0x000f, 0x000a, 0x0011, 0x001a, 0x001c, 0x0011, 0x0031, 0x0025, 0x002d, 0x002f, 0x006f, 0x0075, 0x0041, 0x004c, 0x004e, 0x005c, 0x0060, 0x0062, 0x0066, 0x0068, 0x0069, @@ -1338,7 +1338,7 @@ static const UINT16 table1_mv_code[1100] = { 0x2473, 0x26a2, 0x26a3, 0x000b, }; -static const UINT8 table1_mv_bits[1100] = { +static const uint8_t table1_mv_bits[1100] = { 2, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, @@ -1479,7 +1479,7 @@ static const UINT8 table1_mv_bits[1100] = { 15, 15, 15, 4, }; -static const UINT8 table1_mvx[1099] = { +static const uint8_t table1_mvx[1099] = { 32, 31, 32, 31, 33, 32, 33, 33, 31, 34, 30, 32, 32, 34, 35, 32, 34, 33, 29, 30, 30, 32, 31, 31, @@ -1620,7 +1620,7 @@ static const UINT8 table1_mvx[1099] = { 0, 12, 27, }; -static const UINT8 table1_mvy[1099] = { +static const uint8_t table1_mvy[1099] = { 32, 32, 31, 31, 32, 33, 31, 33, 33, 32, 32, 30, 34, 31, 32, 29, 33, 30, 32, 33, 31, 35, 34, 30, @@ -1764,11 +1764,11 @@ static const UINT8 table1_mvy[1099] = { /* motion vector table */ typedef struct MVTable { int n; - const UINT16 *table_mv_code; - const UINT8 *table_mv_bits; - const UINT8 *table_mvx; - const UINT8 *table_mvy; - UINT16 *table_mv_index; /* encoding: convert mv to index in table_mv */ + const uint16_t *table_mv_code; + const uint8_t *table_mv_bits; + const uint8_t *table_mvx; + const uint8_t *table_mvy; + uint16_t *table_mv_index; /* encoding: convert mv to index in table_mv */ VLC vlc; /* decoding: vlc */ } MVTable; @@ -1789,29 +1789,29 @@ static MVTable mv_tables[2] = { } }; -static const UINT8 v2_mb_type[8][2] = { +static const uint8_t v2_mb_type[8][2] = { {1, 1}, {0 , 2}, {3 , 3}, {9 , 5}, {5, 4}, {0x21, 7}, {0x20, 7}, {0x11, 6}, }; -static const UINT8 v2_intra_cbpc[4][2] = { +static const uint8_t v2_intra_cbpc[4][2] = { {1, 1}, {0, 3}, {1, 3}, {1, 2}, }; -static UINT8 wmv1_y_dc_scale_table[32]={ +static uint8_t wmv1_y_dc_scale_table[32]={ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 0, 8, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21 }; -static UINT8 wmv1_c_dc_scale_table[32]={ +static uint8_t wmv1_c_dc_scale_table[32]={ // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31 0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22 }; -static UINT8 old_ff_y_dc_scale_table[32]={ +static uint8_t old_ff_y_dc_scale_table[32]={ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 }; -static UINT8 old_ff_c_dc_scale_table[32]={ +static uint8_t old_ff_c_dc_scale_table[32]={ // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22 }; @@ -1819,7 +1819,7 @@ static UINT8 old_ff_c_dc_scale_table[32]={ #define WMV1_SCANTABLE_COUNT 4 -static const UINT8 wmv1_scantable00[64]= { +static const uint8_t wmv1_scantable00[64]= { 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05, @@ -1829,7 +1829,7 @@ static const UINT8 wmv1_scantable00[64]= { 0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35, 0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F, }; -static const UINT8 wmv1_scantable01[64]= { +static const uint8_t wmv1_scantable01[64]= { 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D, @@ -1839,7 +1839,7 @@ static const UINT8 wmv1_scantable01[64]= { 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35, 0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F, }; -static const UINT8 wmv1_scantable02[64]= { +static const uint8_t wmv1_scantable02[64]= { 0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07, @@ -1849,7 +1849,7 @@ static const UINT8 wmv1_scantable02[64]= { 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35, 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, }; -static const UINT8 wmv1_scantable03[64]= { +static const uint8_t wmv1_scantable03[64]= { 0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09, 0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29, 0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13, @@ -1860,14 +1860,141 @@ static const UINT8 wmv1_scantable03[64]= { 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, }; -static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ +static const uint8_t *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ wmv1_scantable00, wmv1_scantable01, wmv1_scantable02, wmv1_scantable03, }; -static UINT8 table_inter_intra[4][2]={ +static const uint8_t table_inter_intra[4][2]={ {0,1},{2,2},{6,3},{7,3} }; +#define WMV2_INTER_CBP_TABLE_COUNT 4 + +static const uint32_t table_mb_non_intra2[128][2] = { +{0x0000A7, 14}, {0x01B2B8, 18}, {0x01B28E, 18}, {0x036575, 19}, +{0x006CAC, 16}, {0x000A69, 18}, {0x002934, 20}, {0x00526B, 21}, +{0x006CA1, 16}, {0x01B2B9, 18}, {0x0029AD, 20}, {0x029353, 24}, +{0x006CA7, 16}, {0x006CAB, 16}, {0x01B2BB, 18}, {0x00029B, 16}, +{0x00D944, 17}, {0x000A6A, 18}, {0x0149A8, 23}, {0x03651F, 19}, +{0x006CAF, 16}, {0x000A4C, 18}, {0x03651E, 19}, {0x000A48, 18}, +{0x00299C, 20}, {0x00299F, 20}, {0x029352, 24}, {0x0029AC, 20}, +{0x000296, 16}, {0x00D946, 17}, {0x000A68, 18}, {0x000298, 16}, +{0x000527, 17}, {0x00D94D, 17}, {0x0014D7, 19}, {0x036574, 19}, +{0x000A5C, 18}, {0x01B299, 18}, {0x00299D, 20}, {0x00299E, 20}, +{0x000525, 17}, {0x000A66, 18}, {0x00A4D5, 22}, {0x00149B, 19}, +{0x000295, 16}, {0x006CAD, 16}, {0x000A49, 18}, {0x000521, 17}, +{0x006CAA, 16}, {0x00D945, 17}, {0x01B298, 18}, {0x00052F, 17}, +{0x003654, 15}, {0x006CA0, 16}, {0x000532, 17}, {0x000291, 16}, +{0x003652, 15}, {0x000520, 17}, {0x000A5D, 18}, {0x000294, 16}, +{0x00009B, 11}, {0x0006E2, 12}, {0x000028, 12}, {0x0001B0, 10}, +{0x000001, 3}, {0x000010, 8}, {0x00002F, 6}, {0x00004C, 10}, +{0x00000D, 4}, {0x000000, 10}, {0x000006, 9}, {0x000134, 12}, +{0x00000C, 4}, {0x000007, 10}, {0x000007, 9}, {0x0006E1, 12}, +{0x00000E, 5}, {0x0000DA, 9}, {0x000022, 9}, {0x000364, 11}, +{0x00000F, 4}, {0x000006, 10}, {0x00000F, 9}, {0x000135, 12}, +{0x000014, 5}, {0x0000DD, 9}, {0x000004, 9}, {0x000015, 11}, +{0x00001A, 6}, {0x0001B3, 10}, {0x000005, 10}, {0x0006E3, 12}, +{0x00000C, 5}, {0x0000B9, 8}, {0x000004, 8}, {0x0000DB, 9}, +{0x00000E, 4}, {0x00000B, 10}, {0x000023, 9}, {0x0006CB, 12}, +{0x000005, 6}, {0x0001B1, 10}, {0x000001, 10}, {0x0006E0, 12}, +{0x000011, 5}, {0x0000DF, 9}, {0x00000E, 9}, {0x000373, 11}, +{0x000003, 5}, {0x0000B8, 8}, {0x000006, 8}, {0x000175, 9}, +{0x000015, 5}, {0x000174, 9}, {0x000027, 9}, {0x000372, 11}, +{0x000010, 5}, {0x0000BB, 8}, {0x000005, 8}, {0x0000DE, 9}, +{0x00000F, 5}, {0x000001, 9}, {0x000012, 8}, {0x000004, 10}, +{0x000002, 3}, {0x000016, 5}, {0x000009, 4}, {0x000001, 5}, +}; + +static const uint32_t table_mb_non_intra3[128][2] = { +{0x0002A1, 10}, {0x005740, 15}, {0x01A0BF, 18}, {0x015D19, 17}, +{0x001514, 13}, {0x00461E, 15}, {0x015176, 17}, {0x015177, 17}, +{0x0011AD, 13}, {0x00682E, 16}, {0x0682F9, 20}, {0x03417D, 19}, +{0x001A36, 14}, {0x002A2D, 14}, {0x00D05E, 17}, {0x006824, 16}, +{0x001515, 13}, {0x00545C, 15}, {0x0230E9, 18}, {0x011AFA, 17}, +{0x0015D7, 13}, {0x005747, 15}, {0x008D79, 16}, {0x006825, 16}, +{0x002BA2, 14}, {0x00A8BA, 16}, {0x0235F6, 18}, {0x015D18, 17}, +{0x0011AE, 13}, {0x00346F, 15}, {0x008C3B, 16}, {0x00346E, 15}, +{0x000D1A, 13}, {0x00461F, 15}, {0x0682F8, 20}, {0x011875, 17}, +{0x002BA1, 14}, {0x008D61, 16}, {0x0235F7, 18}, {0x0230E8, 18}, +{0x001513, 13}, {0x008D7B, 16}, {0x011AF4, 17}, {0x011AF5, 17}, +{0x001185, 13}, {0x0046BF, 15}, {0x008D60, 16}, {0x008D7C, 16}, +{0x001512, 13}, {0x00461C, 15}, {0x00AE8D, 16}, {0x008D78, 16}, +{0x000D0E, 13}, {0x003413, 15}, {0x0046B1, 15}, {0x003416, 15}, +{0x000AEA, 12}, {0x002A2C, 14}, {0x005741, 15}, {0x002A2F, 14}, +{0x000158, 9}, {0x0008D2, 12}, {0x00054C, 11}, {0x000686, 12}, +{0x000000, 2}, {0x000069, 8}, {0x00006B, 8}, {0x00068C, 12}, +{0x000007, 3}, {0x00015E, 9}, {0x0002A3, 10}, {0x000AE9, 12}, +{0x000006, 3}, {0x000231, 10}, {0x0002B8, 10}, {0x001A08, 14}, +{0x000010, 5}, {0x0001A9, 10}, {0x000342, 11}, {0x000A88, 12}, +{0x000004, 4}, {0x0001A2, 10}, {0x0002A4, 10}, {0x001184, 13}, +{0x000012, 5}, {0x000232, 10}, {0x0002B2, 10}, {0x000680, 12}, +{0x00001B, 6}, {0x00046A, 11}, {0x00068E, 12}, {0x002359, 14}, +{0x000016, 5}, {0x00015F, 9}, {0x0002A0, 10}, {0x00054D, 11}, +{0x000005, 4}, {0x000233, 10}, {0x0002B9, 10}, {0x0015D6, 13}, +{0x000022, 6}, {0x000468, 11}, {0x000683, 12}, {0x001A0A, 14}, +{0x000013, 5}, {0x000236, 10}, {0x0002BB, 10}, {0x001186, 13}, +{0x000017, 5}, {0x0001AB, 10}, {0x0002A7, 10}, {0x0008D3, 12}, +{0x000014, 5}, {0x000237, 10}, {0x000460, 11}, {0x000D0F, 13}, +{0x000019, 6}, {0x0001AA, 10}, {0x0002B3, 10}, {0x000681, 12}, +{0x000018, 6}, {0x0001A8, 10}, {0x0002A5, 10}, {0x00068F, 12}, +{0x000007, 4}, {0x000055, 7}, {0x000047, 7}, {0x0000AD, 8}, +}; + +static const uint32_t table_mb_non_intra4[128][2] = { +{0x0000D4, 8}, {0x0021C5, 14}, {0x00F18A, 16}, {0x00D5BC, 16}, +{0x000879, 12}, {0x00354D, 14}, {0x010E3F, 17}, {0x010F54, 17}, +{0x000866, 12}, {0x00356E, 14}, {0x010F55, 17}, {0x010E3E, 17}, +{0x0010CE, 13}, {0x003C84, 14}, {0x00D5BD, 16}, {0x00F18B, 16}, +{0x000868, 12}, {0x00438C, 15}, {0x0087AB, 16}, {0x00790B, 15}, +{0x000F10, 12}, {0x00433D, 15}, {0x006AD3, 15}, {0x00790A, 15}, +{0x001AA7, 13}, {0x0043D4, 15}, {0x00871E, 16}, {0x006ADF, 15}, +{0x000D7C, 12}, {0x003C94, 14}, {0x00438D, 15}, {0x006AD2, 15}, +{0x0006BC, 11}, {0x0021E9, 14}, {0x006ADA, 15}, {0x006A99, 15}, +{0x0010F7, 13}, {0x004389, 15}, {0x006ADB, 15}, {0x0078C4, 15}, +{0x000D56, 12}, {0x0035F7, 14}, {0x00438E, 15}, {0x006A98, 15}, +{0x000D52, 12}, {0x003C95, 14}, {0x004388, 15}, {0x00433C, 15}, +{0x000D54, 12}, {0x001E4B, 13}, {0x003C63, 14}, {0x003C83, 14}, +{0x000861, 12}, {0x0021EB, 14}, {0x00356C, 14}, {0x0035F6, 14}, +{0x000863, 12}, {0x00219F, 14}, {0x003568, 14}, {0x003C82, 14}, +{0x0001AE, 9}, {0x0010C0, 13}, {0x000F11, 12}, {0x001AFA, 13}, +{0x000000, 1}, {0x0000F0, 8}, {0x0001AD, 9}, {0x0010C1, 13}, +{0x00000A, 4}, {0x0003C5, 10}, {0x000789, 11}, {0x001AB5, 13}, +{0x000009, 4}, {0x000435, 11}, {0x000793, 11}, {0x001E40, 13}, +{0x00001D, 5}, {0x0003CB, 10}, {0x000878, 12}, {0x001AAF, 13}, +{0x00000B, 4}, {0x0003C7, 10}, {0x000791, 11}, {0x001AAB, 13}, +{0x00001F, 5}, {0x000436, 11}, {0x0006BF, 11}, {0x000F19, 12}, +{0x00003D, 6}, {0x000D51, 12}, {0x0010C4, 13}, {0x0021E8, 14}, +{0x000036, 6}, {0x000437, 11}, {0x0006AF, 11}, {0x0010C5, 13}, +{0x00000C, 4}, {0x000432, 11}, {0x000794, 11}, {0x001E30, 13}, +{0x000042, 7}, {0x000870, 12}, {0x000F24, 12}, {0x001E43, 13}, +{0x000020, 6}, {0x00043E, 11}, {0x000795, 11}, {0x001AAA, 13}, +{0x000037, 6}, {0x0006AC, 11}, {0x0006AE, 11}, {0x0010F6, 13}, +{0x000034, 6}, {0x00043A, 11}, {0x000D50, 12}, {0x001AAE, 13}, +{0x000039, 6}, {0x00043F, 11}, {0x00078D, 11}, {0x0010D2, 13}, +{0x000038, 6}, {0x00043B, 11}, {0x0006BD, 11}, {0x0010D3, 13}, +{0x000011, 5}, {0x0001AC, 9}, {0x0000F3, 8}, {0x000439, 11}, +}; + +static const uint32_t (*wmv2_inter_table[WMV2_INTER_CBP_TABLE_COUNT])[2]={ + table_mb_non_intra2, + table_mb_non_intra3, + table_mb_non_intra4, + table_mb_non_intra, +}; + +static const uint8_t wmv2_scantableA[64]={ +0x00, 0x01, 0x02, 0x08, 0x03, 0x09, 0x0A, 0x10, +0x04, 0x0B, 0x11, 0x18, 0x12, 0x0C, 0x05, 0x13, +0x19, 0x0D, 0x14, 0x1A, 0x1B, 0x06, 0x15, 0x1C, +0x0E, 0x16, 0x1D, 0x07, 0x1E, 0x0F, 0x17, 0x1F, +}; + +static const uint8_t wmv2_scantableB[64]={ +0x00, 0x08, 0x01, 0x10, 0x09, 0x18, 0x11, 0x02, +0x20, 0x0A, 0x19, 0x28, 0x12, 0x30, 0x21, 0x1A, +0x38, 0x29, 0x22, 0x03, 0x31, 0x39, 0x0B, 0x2A, +0x13, 0x32, 0x1B, 0x3A, 0x23, 0x2B, 0x33, 0x3B, +}; diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index ad27ac5942..8c9ce7b936 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -473,3 +473,93 @@ void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block) idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); } } + +/* 8x4 & 4x8 WMV2 IDCT */ +#undef CN_SHIFT +#undef C_SHIFT +#undef C_FIX +#undef C1 +#undef C2 +#define CN_SHIFT 12 +#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) +#define C1 C_FIX(0.6532814824) +#define C2 C_FIX(0.2705980501) +#define C3 C_FIX(0.5) +#define C_SHIFT (4+1+12) +static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col) +{ + int c0, c1, c2, c3, a0, a1, a2, a3; + const UINT8 *cm = cropTbl + MAX_NEG_CROP; + + a0 = col[8*0]; + a1 = col[8*1]; + a2 = col[8*2]; + a3 = col[8*3]; + c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); + c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); + c1 = a1 * C1 + a3 * C2; + c3 = a1 * C2 - a3 * C1; + dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; + dest += line_size; + dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; + dest += line_size; + dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; + dest += line_size; + dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; +} + +#define RN_SHIFT 15 +#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) +#define R1 R_FIX(0.6532814824) +#define R2 R_FIX(0.2705980501) +#define R3 R_FIX(0.5) +#define R_SHIFT 11 +static inline void idct4row(INT16 *row) +{ + int c0, c1, c2, c3, a0, a1, a2, a3; + const UINT8 *cm = cropTbl + MAX_NEG_CROP; + + a0 = row[0]; + a1 = row[1]; + a2 = row[2]; + a3 = row[3]; + c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); + c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); + c1 = a1 * R1 + a3 * R2; + c3 = a1 * R2 - a3 * R1; + row[0]= (c0 + c1) >> R_SHIFT; + row[1]= (c2 + c3) >> R_SHIFT; + row[2]= (c2 - c3) >> R_SHIFT; + row[3]= (c0 - c1) >> R_SHIFT; +} + +void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block) +{ + int i; + + /* IDCT8 on each line */ + for(i=0; i<4; i++) { + idctRowCondDC(block + i*8); + } + + /* IDCT4 and store */ + for(i=0;i<8;i++) { + idct4col_add(dest + i, line_size, block + i); + } +} + +void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block) +{ + int i; + + /* IDCT4 on each line */ + for(i=0; i<8; i++) { + idct4row(block + i*8); + } + + /* IDCT8 and store */ + for(i=0; i<4; i++){ + idctSparseColAdd(dest + i, line_size, block + i); + } +} + diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h index 6c6b4f0115..428c6072c8 100644 --- a/libavcodec/simple_idct.h +++ b/libavcodec/simple_idct.h @@ -26,3 +26,6 @@ void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block); void simple_idct(short *block); void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block); + +void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block); +void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block); diff --git a/libavcodec/wmv2.c b/libavcodec/wmv2.c new file mode 100644 index 0000000000..d25b7a5f17 --- /dev/null +++ b/libavcodec/wmv2.c @@ -0,0 +1,850 @@ +/* + * Copyright (c) 2002 The FFmpeg Project. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include "simple_idct.h" + +#define SKIP_TYPE_NONE 0 +#define SKIP_TYPE_MPEG 1 +#define SKIP_TYPE_ROW 2 +#define SKIP_TYPE_COL 3 + + +typedef struct Wmv2Context{ + MpegEncContext s; + int j_type_bit; + int j_type; + int flag3; + int flag63; + int abt_flag; + int abt_type; + int abt_type_table[6]; + int per_mb_abt; + int per_block_abt; + int mspel_bit; + int cbp_table_index; + int top_left_mv_flag; + int per_mb_rl_bit; + int skip_type; + int hshift; + + ScanTable abt_scantable[2]; + DCTELEM abt_block2[6][64] __align8; +}Wmv2Context; + +static void wmv2_common_init(Wmv2Context * w){ + MpegEncContext * const s= &w->s; + + ff_init_scantable(s, &w->abt_scantable[0], wmv2_scantableA); + ff_init_scantable(s, &w->abt_scantable[1], wmv2_scantableB); +} + +static int encode_ext_header(Wmv2Context *w){ + MpegEncContext * const s= &w->s; + PutBitContext pb; + int code; + + init_put_bits(&pb, s->avctx->extradata, s->avctx->extradata_size, NULL, NULL); + + put_bits(&pb, 5, s->frame_rate / FRAME_RATE_BASE); //yes 29.97 -> 29 + put_bits(&pb, 11, FFMIN(s->bit_rate/1024, 2047)); + + put_bits(&pb, 1, w->mspel_bit=1); + put_bits(&pb, 1, w->flag3=1); + put_bits(&pb, 1, w->abt_flag=1); + put_bits(&pb, 1, w->j_type_bit=1); + put_bits(&pb, 1, w->top_left_mv_flag=0); + put_bits(&pb, 1, w->per_mb_rl_bit=1); + put_bits(&pb, 3, code=1); + + flush_put_bits(&pb); + + s->slice_height = s->mb_height / code; + + return 0; +} + +static int wmv2_encode_init(AVCodecContext *avctx){ + Wmv2Context * const w= avctx->priv_data; + + if(MPV_encode_init(avctx) < 0) + return -1; + + wmv2_common_init(w); + + avctx->extradata_size= 4; + avctx->extradata= av_mallocz(avctx->extradata_size + 10); + encode_ext_header(w); + + return 0; +} + +static int wmv2_encode_end(AVCodecContext *avctx){ + Wmv2Context * const w= avctx->priv_data; + + if(MPV_encode_end(avctx) < 0) + return -1; + + avctx->extradata_size= 0; + av_freep(&avctx->extradata); + + return 0; +} + +int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number) +{ + Wmv2Context * const w= (Wmv2Context*)s; + + put_bits(&s->pb, 1, s->pict_type - 1); + if(s->pict_type == I_TYPE){ + put_bits(&s->pb, 7, 0); + } + put_bits(&s->pb, 5, s->qscale); + + s->dc_table_index = 1; + s->mv_table_index = 1; /* only if P frame */ +// s->use_skip_mb_code = 1; /* only if P frame */ + s->per_mb_rl_table = 0; + s->mspel= 0; + w->per_mb_abt=0; + w->abt_type=0; + w->j_type=0; + + if (s->pict_type == I_TYPE) { + if(w->j_type_bit) put_bits(&s->pb, 1, w->j_type); + + if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table); + + if(!s->per_mb_rl_table){ + code012(&s->pb, s->rl_chroma_table_index); + code012(&s->pb, s->rl_table_index); + } + + put_bits(&s->pb, 1, s->dc_table_index); + + s->inter_intra_pred= 0; + s->no_rounding = 1; + }else{ + int cbp_index; + + put_bits(&s->pb, 2, SKIP_TYPE_NONE); + + code012(&s->pb, cbp_index=0); + if(s->qscale <= 10){ + int map[3]= {0,2,1}; + w->cbp_table_index= map[cbp_index]; + }else if(s->qscale <= 20){ + int map[3]= {1,0,2}; + w->cbp_table_index= map[cbp_index]; + }else{ + int map[3]= {2,1,0}; + w->cbp_table_index= map[cbp_index]; + } + + if(w->mspel_bit) put_bits(&s->pb, 1, s->mspel); + + if(w->abt_flag){ + put_bits(&s->pb, 1, w->per_mb_abt^1); + if(!w->per_mb_abt){ + code012(&s->pb, w->abt_type); + } + } + + if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table); + + if(!s->per_mb_rl_table){ + code012(&s->pb, s->rl_table_index); + s->rl_chroma_table_index = s->rl_table_index; + } + put_bits(&s->pb, 1, s->dc_table_index); + put_bits(&s->pb, 1, s->mv_table_index); + + s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE); + s->no_rounding ^= 1; + } + s->esc3_level_length= 0; + s->esc3_run_length= 0; + + return 0; +} + +// nearly idential to wmv1 but thats just because we dont use the useless M$ crap features +// its duplicated here in case someone wants to add support for these carp features +void ff_wmv2_encode_mb(MpegEncContext * s, + DCTELEM block[6][64], + int motion_x, int motion_y) +{ + Wmv2Context * const w= (Wmv2Context*)s; + int cbp, coded_cbp, i; + int pred_x, pred_y; + UINT8 *coded_block; + + handle_slices(s); + + if (!s->mb_intra) { + /* compute cbp */ + set_stat(ST_INTER_MB); + cbp = 0; + for (i = 0; i < 6; i++) { + if (s->block_last_index[i] >= 0) + cbp |= 1 << (5 - i); + } + + put_bits(&s->pb, + wmv2_inter_table[w->cbp_table_index][cbp + 64][1], + wmv2_inter_table[w->cbp_table_index][cbp + 64][0]); + + /* motion vector */ + h263_pred_motion(s, 0, &pred_x, &pred_y); + msmpeg4_encode_motion(s, motion_x - pred_x, + motion_y - pred_y); + } else { + /* compute cbp */ + cbp = 0; + coded_cbp = 0; + for (i = 0; i < 6; i++) { + int val, pred; + val = (s->block_last_index[i] >= 1); + cbp |= val << (5 - i); + if (i < 4) { + /* predict value for close blocks only for luma */ + pred = coded_block_pred(s, i, &coded_block); + *coded_block = val; + val = val ^ pred; + } + coded_cbp |= val << (5 - i); + } +#if 0 + if (coded_cbp) + printf("cbp=%x %x\n", cbp, coded_cbp); +#endif + + if (s->pict_type == I_TYPE) { + set_stat(ST_INTRA_MB); + put_bits(&s->pb, + table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]); + } else { + put_bits(&s->pb, + wmv2_inter_table[w->cbp_table_index][cbp][1], + wmv2_inter_table[w->cbp_table_index][cbp][0]); + } + set_stat(ST_INTRA_MB); + put_bits(&s->pb, 1, 0); /* no AC prediction yet */ + if(s->inter_intra_pred){ + s->h263_aic_dir=0; + put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]); + } + } + + for (i = 0; i < 6; i++) { + msmpeg4_encode_block(s, block[i], i); + } +} + +static void parse_mb_skip(Wmv2Context * w){ + int mb_x, mb_y; + MpegEncContext * const s= &w->s; + + w->skip_type= get_bits(&s->gb, 2); + switch(w->skip_type){ + case SKIP_TYPE_NONE: + for(mb_y=0; mb_y<s->mb_height; mb_y++){ + for(mb_x=0; mb_x<s->mb_width; mb_x++){ + s->mb_type[mb_y*s->mb_width + mb_x]= 0; + } + } + break; + case SKIP_TYPE_MPEG: + for(mb_y=0; mb_y<s->mb_height; mb_y++){ + for(mb_x=0; mb_x<s->mb_width; mb_x++){ + s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0; + } + } + break; + case SKIP_TYPE_ROW: + for(mb_y=0; mb_y<s->mb_height; mb_y++){ + if(get_bits1(&s->gb)){ + for(mb_x=0; mb_x<s->mb_width; mb_x++){ + s->mb_type[mb_y*s->mb_width + mb_x]= MB_TYPE_SKIPED; + } + }else{ + for(mb_x=0; mb_x<s->mb_width; mb_x++){ + s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0; + } + } + } + break; + case SKIP_TYPE_COL: + for(mb_x=0; mb_x<s->mb_width; mb_x++){ + if(get_bits1(&s->gb)){ + for(mb_y=0; mb_y<s->mb_height; mb_y++){ + s->mb_type[mb_y*s->mb_width + mb_x]= MB_TYPE_SKIPED; + } + }else{ + for(mb_y=0; mb_y<s->mb_height; mb_y++){ + s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0; + } + } + } + break; + } +} + +static int decode_ext_header(Wmv2Context *w){ + MpegEncContext * const s= &w->s; + GetBitContext gb; + int fps; + int code; + + if(s->avctx->extradata_size<4) return -1; + + init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size); + + fps = get_bits(&gb, 5); + s->bit_rate = get_bits(&gb, 11)*1024; + w->mspel_bit = get_bits1(&gb); + w->flag3 = get_bits1(&gb); + w->abt_flag = get_bits1(&gb); + w->j_type_bit = get_bits1(&gb); + w->top_left_mv_flag= get_bits1(&gb); + w->per_mb_rl_bit = get_bits1(&gb); + code = get_bits(&gb, 3); + + if(code==0) return -1; + + s->slice_height = s->mb_height / code; + + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + printf("fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d\n", + fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3); + } + return 0; +} + +int ff_wmv2_decode_picture_header(MpegEncContext * s) +{ + Wmv2Context * const w= (Wmv2Context*)s; + int code, i; + +#if 0 +{ +int i; +for(i=0; i<s->gb.size*8; i++) + printf("%d", get_bits1(&s->gb)); +// get_bits1(&s->gb); +printf("END\n"); +return -1; +} +#endif + if(s->picture_number==0) + decode_ext_header(w); + + s->pict_type = get_bits(&s->gb, 1) + 1; + if(s->pict_type == I_TYPE){ + code = get_bits(&s->gb, 7); + printf("I7:%X/\n", code); + } + s->qscale = get_bits(&s->gb, 5); + + if (s->pict_type == I_TYPE) { + if(w->j_type_bit) w->j_type= get_bits1(&s->gb); + else w->j_type= 0; //FIXME check + + if(!w->j_type){ + if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb); + else s->per_mb_rl_table= 0; + + if(!s->per_mb_rl_table){ + s->rl_chroma_table_index = decode012(&s->gb); + s->rl_table_index = decode012(&s->gb); + } + + s->dc_table_index = get_bits1(&s->gb); + } + s->inter_intra_pred= 0; + s->no_rounding = 1; + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + printf("qscale:%d rlc:%d rl:%d dc:%d mbrl:%d j_type:%d \n", + s->qscale, + s->rl_chroma_table_index, + s->rl_table_index, + s->dc_table_index, + s->per_mb_rl_table, + w->j_type); + } + }else{ + int cbp_index; + w->j_type=0; + + parse_mb_skip(w); + cbp_index= decode012(&s->gb); + if(s->qscale <= 10){ + int map[3]= {0,2,1}; + w->cbp_table_index= map[cbp_index]; + }else if(s->qscale <= 20){ + int map[3]= {1,0,2}; + w->cbp_table_index= map[cbp_index]; + }else{ + int map[3]= {2,1,0}; + w->cbp_table_index= map[cbp_index]; + } + + if(w->mspel_bit) s->mspel= get_bits1(&s->gb); + else s->mspel= 0; //FIXME check + + if(w->abt_flag){ + w->per_mb_abt= get_bits1(&s->gb)^1; + if(!w->per_mb_abt){ + w->abt_type= decode012(&s->gb); + } + } + + if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb); + else s->per_mb_rl_table= 0; + + if(!s->per_mb_rl_table){ + s->rl_table_index = decode012(&s->gb); + s->rl_chroma_table_index = s->rl_table_index; + } + + s->dc_table_index = get_bits1(&s->gb); + s->mv_table_index = get_bits1(&s->gb); + + s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE); + s->no_rounding ^= 1; + + if(s->avctx->debug&FF_DEBUG_PICT_INFO){ + printf("rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d mspel:%d per_mb_abt:%d abt_type:%d cbp:%d ii:%d\n", + s->rl_table_index, + s->rl_chroma_table_index, + s->dc_table_index, + s->mv_table_index, + s->per_mb_rl_table, + s->qscale, + s->mspel, + w->per_mb_abt, + w->abt_type, + w->cbp_table_index, + s->inter_intra_pred); + } + } + s->esc3_level_length= 0; + s->esc3_run_length= 0; + + if(s->avctx->debug&FF_DEBUG_SKIP){ + for(i=0; i<s->mb_num; i++){ + if(i%s->mb_width==0) printf("\n"); + printf("%d", s->mb_type[i]); + } + } +s->picture_number++; //FIXME ? + + +// if(w->j_type) +// return wmv2_decode_j_picture(w); //FIXME + + if(w->j_type){ + printf("J-type picture isnt supported\n"); + return -1; + } + + return 0; +} + +void ff_wmv2_decode_init(MpegEncContext *s){ +} + +static inline int wmv2_decode_motion(Wmv2Context *w, int *mx_ptr, int *my_ptr){ + MpegEncContext * const s= &w->s; + int ret; + + ret= msmpeg4_decode_motion(s, mx_ptr, my_ptr); + + if(ret<0) return -1; + + if((((*mx_ptr)|(*my_ptr)) & 1) && s->mspel) + w->hshift= get_bits1(&s->gb); + else + w->hshift= 0; + +//printf("%d %d ", *mx_ptr, *my_ptr); + + return 0; +} + +static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){ + MpegEncContext * const s= &w->s; + int xy, wrap, diff, type; + INT16 *A, *B, *C, *mot_val; + + wrap = s->block_wrap[0]; + xy = s->block_index[0]; + + mot_val = s->motion_val[xy]; + + A = s->motion_val[xy - 1]; + B = s->motion_val[xy - wrap]; + C = s->motion_val[xy + 2 - wrap]; + + diff= FFMAX(ABS(A[0] - B[0]), ABS(A[1] - B[1])); + + if(s->mb_x && s->mb_y && !s->mspel && w->top_left_mv_flag && diff >= 8) + //FIXME top/left bit too if y=!0 && first_slice_line? + type= get_bits1(&s->gb); + else + type= 2; + + if(type == 0){ + *px= A[0]; + *py= A[1]; + }else if(type == 1){ + *px= B[0]; + *py= B[1]; + }else{ + /* special case for first (slice) line */ + if (s->first_slice_line) { + *px = A[0]; + *py = A[1]; + } else { + *px = mid_pred(A[0], B[0], C[0]); + *py = mid_pred(A[1], B[1], C[1]); + } + } + + return mot_val; +} + +static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n, int cbp){ + MpegEncContext * const s= &w->s; + static const int sub_cbp_table[3]= {2,3,1}; + int sub_cbp; + + if(!cbp){ + s->block_last_index[n] = -1; + + return 0; + } + + if(w->per_block_abt) + w->abt_type= decode012(&s->gb); +#if 0 + if(w->per_block_abt) + printf("B%d", w->abt_type); +#endif + w->abt_type_table[n]= w->abt_type; + + if(w->abt_type){ +// const uint8_t *scantable= w->abt_scantable[w->abt_type-1].permutated; + const uint8_t *scantable= w->abt_scantable[w->abt_type-1].scantable; +// const uint8_t *scantable= w->abt_type-1 ? w->abt_scantable[1].permutated : w->abt_scantable[0].scantable; + + sub_cbp= sub_cbp_table[ decode012(&s->gb) ]; +// printf("S%d", sub_cbp); + + if(sub_cbp&1){ + if (msmpeg4_decode_block(s, block, n, 1, scantable) < 0) + return -1; + } + + if(sub_cbp&2){ + if (msmpeg4_decode_block(s, w->abt_block2[n], n, 1, scantable) < 0) + return -1; + } + s->block_last_index[n] = 63; + + return 0; + }else{ + return msmpeg4_decode_block(s, block, n, 1, s->inter_scantable.permutated); + } +} + +static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int stride, int n){ + MpegEncContext * const s= &w->s; + uint8_t temp[2][64]; + int i; + + if(w->abt_type_table[n] && 0){ + int a,b; + + a= block1[0]; + b= w->abt_block2[n][0]; + block1[0]= a+b; + w->abt_block2[n][0]= a-b; + } + + switch(w->abt_type_table[n]){ + case 0: + if (s->block_last_index[n] >= 0) { + s->idct_add (dst, stride, block1); + } + break; + case 1: + simple_idct84_add(dst , stride, block1); + simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]); + memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM)); + break; + case 2: + simple_idct48_add(dst , stride, block1); + simple_idct48_add(dst + 4 , stride, w->abt_block2[n]); + memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM)); + break; + default: + fprintf(stderr, "internal error in WMV2 abt\n"); + } +} + +void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block1[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr){ + Wmv2Context * const w= (Wmv2Context*)s; + + wmv2_add_block(w, block1[0], dest_y , s->linesize, 0); + wmv2_add_block(w, block1[1], dest_y + 8 , s->linesize, 1); + wmv2_add_block(w, block1[2], dest_y + 8*s->linesize, s->linesize, 2); + wmv2_add_block(w, block1[3], dest_y + 8 + 8*s->linesize, s->linesize, 3); + + if(s->flags&CODEC_FLAG_GRAY) return; + + wmv2_add_block(w, block1[4], dest_cb , s->uvlinesize, 4); + wmv2_add_block(w, block1[5], dest_cr , s->uvlinesize, 5); +} + +void ff_mspel_motion(MpegEncContext *s, + UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, + UINT8 **ref_picture, op_pixels_func (*pix_op)[4], + int motion_x, int motion_y, int h) +{ + Wmv2Context * const w= (Wmv2Context*)s; + UINT8 *ptr; + int dxy, offset, mx, my, src_x, src_y, v_edge_pos, linesize, uvlinesize; + int emu=0; + + dxy = ((motion_y & 1) << 1) | (motion_x & 1); + dxy = 2*dxy + w->hshift; + src_x = s->mb_x * 16 + (motion_x >> 1); + src_y = s->mb_y * 16 + (motion_y >> 1); + + /* WARNING: do no forget half pels */ + v_edge_pos = s->v_edge_pos; + src_x = clip(src_x, -16, s->width); + src_y = clip(src_y, -16, s->height); + linesize = s->linesize; + uvlinesize = s->uvlinesize; + ptr = ref_picture[0] + (src_y * linesize) + src_x; + + if(s->flags&CODEC_FLAG_EMU_EDGE){ + if(src_x<1 || src_y<1 || src_x + 17 >= s->h_edge_pos + || src_y + h+1 >= v_edge_pos){ + ff_emulated_edge_mc(s, ptr - 1 - s->linesize, s->linesize, 19, 19, + src_x-1, src_y-1, s->h_edge_pos, s->v_edge_pos); + ptr= s->edge_emu_buffer + 1 + s->linesize; + emu=1; + } + } + + s->dsp.put_mspel_pixels_tab[dxy](dest_y , ptr , linesize); + s->dsp.put_mspel_pixels_tab[dxy](dest_y+8 , ptr+8 , linesize); + s->dsp.put_mspel_pixels_tab[dxy](dest_y +8*linesize, ptr +8*linesize, linesize); + s->dsp.put_mspel_pixels_tab[dxy](dest_y+8+8*linesize, ptr+8+8*linesize, linesize); + + if(s->flags&CODEC_FLAG_GRAY) return; + + if (s->out_format == FMT_H263) { + dxy = 0; + if ((motion_x & 3) != 0) + dxy |= 1; + if ((motion_y & 3) != 0) + dxy |= 2; + mx = motion_x >> 2; + my = motion_y >> 2; + } else { + mx = motion_x / 2; + my = motion_y / 2; + dxy = ((my & 1) << 1) | (mx & 1); + mx >>= 1; + my >>= 1; + } + + src_x = s->mb_x * 8 + mx; + src_y = s->mb_y * 8 + my; + src_x = clip(src_x, -8, s->width >> 1); + if (src_x == (s->width >> 1)) + dxy &= ~1; + src_y = clip(src_y, -8, s->height >> 1); + if (src_y == (s->height >> 1)) + dxy &= ~2; + offset = (src_y * uvlinesize) + src_x; + ptr = ref_picture[1] + offset; + if(emu){ + ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, + src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + ptr= s->edge_emu_buffer; + } + pix_op[1][dxy](dest_cb, ptr, uvlinesize, h >> 1); + + ptr = ref_picture[2] + offset; + if(emu){ + ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, + src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1); + ptr= s->edge_emu_buffer; + } + pix_op[1][dxy](dest_cr, ptr, uvlinesize, h >> 1); +} + + +static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]) +{ + Wmv2Context * const w= (Wmv2Context*)s; + int cbp, code, i; + UINT8 *coded_val; + + if(w->j_type) return 0; + + s->error_status_table[s->mb_x + s->mb_y*s->mb_width]= 0; + + if (s->pict_type == P_TYPE) { + if(s->mb_type[s->mb_y * s->mb_width + s->mb_x]&MB_TYPE_SKIPED){ + /* skip mb */ + s->mb_intra = 0; + for(i=0;i<6;i++) + s->block_last_index[i] = -1; + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = 0; + s->mv[0][0][1] = 0; + s->mb_skiped = 1; + return 0; + } + + code = get_vlc2(&s->gb, mb_non_intra_vlc[w->cbp_table_index].table, MB_NON_INTRA_VLC_BITS, 3); + if (code < 0) + return -1; + s->mb_intra = (~code & 0x40) >> 6; + + cbp = code & 0x3f; + } else { + s->mb_intra = 1; + code = get_vlc2(&s->gb, mb_intra_vlc.table, MB_INTRA_VLC_BITS, 2); + if (code < 0){ + fprintf(stderr, "II-cbp illegal at %d %d\n", s->mb_x, s->mb_y); + return -1; + } + /* predict coded block pattern */ + cbp = 0; + for(i=0;i<6;i++) { + int val = ((code >> (5 - i)) & 1); + if (i < 4) { + int pred = coded_block_pred(s, i, &coded_val); + val = val ^ pred; + *coded_val = val; + } + cbp |= val << (5 - i); + } + } + + if (!s->mb_intra) { + int mx, my; +//printf("P at %d %d\n", s->mb_x, s->mb_y); + wmv2_pred_motion(w, &mx, &my); + + if(cbp){ + if(s->per_mb_rl_table){ + s->rl_table_index = decode012(&s->gb); + s->rl_chroma_table_index = s->rl_table_index; + } + + if(w->abt_flag && w->per_mb_abt){ + w->per_block_abt= get_bits1(&s->gb); + if(!w->per_block_abt) + w->abt_type= decode012(&s->gb); + }else + w->per_block_abt=0; + } + + if (wmv2_decode_motion(w, &mx, &my) < 0) + return -1; + + s->mv_dir = MV_DIR_FORWARD; + s->mv_type = MV_TYPE_16X16; + s->mv[0][0][0] = mx; + s->mv[0][0][1] = my; + + for (i = 0; i < 6; i++) { + if (wmv2_decode_inter_block(w, block[i], i, (cbp >> (5 - i)) & 1) < 0) + { + fprintf(stderr,"\nerror while decoding inter block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); + return -1; + } + } + } else { +//if(s->pict_type==P_TYPE) +// printf("%d%d ", s->inter_intra_pred, cbp); +//printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24)); + s->ac_pred = get_bits1(&s->gb); + if(s->inter_intra_pred){ + s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1); +// printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y); + } + if(s->per_mb_rl_table && cbp){ + s->rl_table_index = decode012(&s->gb); + s->rl_chroma_table_index = s->rl_table_index; + } + + for (i = 0; i < 6; i++) { + if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0) + { + fprintf(stderr,"\nerror while decoding intra block: %d x %d (%d)\n", s->mb_x, s->mb_y, i); + return -1; + } + } + } + + return 0; +} + +static int wmv2_decode_init(AVCodecContext *avctx){ + Wmv2Context * const w= avctx->priv_data; + + if(ff_h263_decode_init(avctx) < 0) + return -1; + + wmv2_common_init(w); + + return 0; +} + +AVCodec wmv2_decoder = { + "wmv2", + CODEC_TYPE_VIDEO, + CODEC_ID_WMV2, + sizeof(Wmv2Context), + wmv2_decode_init, + NULL, + ff_h263_decode_end, + ff_h263_decode_frame, + CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1, +}; + +AVCodec wmv2_encoder = { + "wmv2", + CODEC_TYPE_VIDEO, + CODEC_ID_WMV2, + sizeof(Wmv2Context), + wmv2_encode_init, + MPV_encode_picture, + MPV_encode_end, +}; + |