aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2002-12-27 23:51:46 +0000
committerMichael Niedermayer <michaelni@gmx.at>2002-12-27 23:51:46 +0000
commit1457ab523343e94e094ad1c60de37077f8dc5589 (patch)
tree2df86f0b66c5df4c373dec5809a1f62c563df901
parentac97734133a52c41825e427fd15a66f65a89d4bb (diff)
downloadffmpeg-1457ab523343e94e094ad1c60de37077f8dc5589.tar.gz
qpel encoding
4mv+b frames encoding finally fixed chroma ME 5 comparission functions for ME b frame encoding speedup wmv2 codec (unfinished) user specified diamond size for EPZS Originally committed as revision 1365 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/allcodecs.c4
-rw-r--r--libavcodec/avcodec.h39
-rw-r--r--libavcodec/dsputil.c308
-rw-r--r--libavcodec/dsputil.h35
-rw-r--r--libavcodec/h263.c175
-rw-r--r--libavcodec/h263dec.c67
-rw-r--r--libavcodec/i386/dsputil_mmx.c161
-rw-r--r--libavcodec/i386/motion_est_mmx.c19
-rw-r--r--libavcodec/motion_est.c1149
-rw-r--r--libavcodec/motion_est_template.c737
-rw-r--r--libavcodec/mpeg12.c2
-rw-r--r--libavcodec/mpegvideo.c188
-rw-r--r--libavcodec/mpegvideo.h60
-rw-r--r--libavcodec/msmpeg4.c55
-rw-r--r--libavcodec/msmpeg4data.h241
-rw-r--r--libavcodec/simple_idct.c90
-rw-r--r--libavcodec/simple_idct.h3
-rw-r--r--libavcodec/wmv2.c850
18 files changed, 3213 insertions, 970 deletions
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 286221dbca..a5d2e41cfb 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -53,7 +53,7 @@ void avcodec_register_all(void)
register_avcodec(&msmpeg4v2_encoder);
register_avcodec(&msmpeg4v3_encoder);
register_avcodec(&wmv1_encoder);
-// register_avcodec(&wmv2_encoder);
+ register_avcodec(&wmv2_encoder);
register_avcodec(&huffyuv_encoder);
#endif /* CONFIG_ENCODERS */
register_avcodec(&rawvideo_codec);
@@ -66,7 +66,7 @@ void avcodec_register_all(void)
register_avcodec(&msmpeg4v2_decoder);
register_avcodec(&msmpeg4v3_decoder);
register_avcodec(&wmv1_decoder);
-// register_avcodec(&wmv2_decoder);
+ register_avcodec(&wmv2_decoder);
register_avcodec(&mpeg_decoder);
register_avcodec(&h263i_decoder);
register_avcodec(&rv10_decoder);
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 606cfd814b..a2a1d3428e 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -5,8 +5,8 @@
#define LIBAVCODEC_VERSION_INT 0x000406
#define LIBAVCODEC_VERSION "0.4.6"
-#define LIBAVCODEC_BUILD 4646
-#define LIBAVCODEC_BUILD_STR "4646"
+#define LIBAVCODEC_BUILD 4647
+#define LIBAVCODEC_BUILD_STR "4647"
enum CodecID {
CODEC_ID_NONE,
@@ -850,6 +850,41 @@ typedef struct AVCodecContext {
* decoding: unused
*/
int mb_qmax;
+
+ /**
+ * motion estimation compare function
+ * encoding: set by user.
+ * decoding: unused
+ */
+ int me_cmp;
+ /**
+ * subpixel motion estimation compare function
+ * encoding: set by user.
+ * decoding: unused
+ */
+ int me_sub_cmp;
+ /**
+ * macroblock compare function (not supported yet)
+ * encoding: set by user.
+ * decoding: unused
+ */
+ int mb_cmp;
+#define FF_CMP_SAD 0
+#define FF_CMP_SSE 1
+#define FF_CMP_SATD 2
+#define FF_CMP_DCT 3
+#define FF_CMP_PSNR 4
+#define FF_CMP_BIT 5
+#define FF_CMP_RD 6
+#define FF_CMP_ZERO 7
+#define FF_CMP_CHROMA 256
+
+ /**
+ * ME diamond size
+ * encoding: set by user.
+ * decoding: unused
+ */
+ int dia_size;
} AVCodecContext;
typedef struct AVCodec {
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 1e177116a4..c48c71119b 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -20,6 +20,7 @@
*/
#include "avcodec.h"
#include "dsputil.h"
+#include "mpegvideo.h"
int ff_bit_exact=0;
@@ -144,7 +145,28 @@ static int pix_norm1_c(UINT8 * pix, int line_size)
}
-static int pix_norm_c(UINT8 * pix1, UINT8 * pix2, int line_size)
+static int sse8_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
+{
+ int s, i;
+ UINT32 *sq = squareTbl + 256;
+
+ s = 0;
+ for (i = 0; i < 8; i++) {
+ s += sq[pix1[0] - pix2[0]];
+ s += sq[pix1[1] - pix2[1]];
+ s += sq[pix1[2] - pix2[2]];
+ s += sq[pix1[3] - pix2[3]];
+ s += sq[pix1[4] - pix2[4]];
+ s += sq[pix1[5] - pix2[5]];
+ s += sq[pix1[6] - pix2[6]];
+ s += sq[pix1[7] - pix2[7]];
+ pix1 += line_size;
+ pix2 += line_size;
+ }
+ return s;
+}
+
+static int sse16_c(void *v, UINT8 * pix1, UINT8 * pix2, int line_size)
{
int s, i, j;
UINT32 *sq = squareTbl + 256;
@@ -1141,7 +1163,103 @@ QPEL_MC(0, avg_ , _ , op_avg)
#undef op_put
#undef op_put_no_rnd
-static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
+ uint8_t *cm = cropTbl + MAX_NEG_CROP;
+ int i;
+
+ for(i=0; i<h; i++){
+ dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
+ dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
+ dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
+ dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
+ dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
+ dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
+ dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
+ dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
+ dst+=dstStride;
+ src+=srcStride;
+ }
+}
+
+static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
+ uint8_t *cm = cropTbl + MAX_NEG_CROP;
+ int i;
+
+ for(i=0; i<w; i++){
+ const int src_1= src[ -srcStride];
+ const int src0 = src[0 ];
+ const int src1 = src[ srcStride];
+ const int src2 = src[2*srcStride];
+ const int src3 = src[3*srcStride];
+ const int src4 = src[4*srcStride];
+ const int src5 = src[5*srcStride];
+ const int src6 = src[6*srcStride];
+ const int src7 = src[7*srcStride];
+ const int src8 = src[8*srcStride];
+ const int src9 = src[9*srcStride];
+ dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
+ dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
+ dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
+ dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
+ dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
+ dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
+ dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
+ dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
+ src++;
+ dst++;
+ }
+}
+
+static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
+ put_pixels8_c(dst, src, stride, 8);
+}
+
+static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
+ uint8_t half[64];
+ wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+ put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
+}
+
+static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
+ wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
+}
+
+static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
+ uint8_t half[64];
+ wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
+ put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
+}
+
+static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
+ wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
+}
+
+static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
+ uint8_t halfH[88];
+ uint8_t halfV[64];
+ uint8_t halfHV[64];
+ wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+ wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
+ wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+ put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
+ uint8_t halfH[88];
+ uint8_t halfV[64];
+ uint8_t halfHV[64];
+ wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+ wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
+ wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
+ put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
+}
+static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
+ uint8_t halfH[88];
+ wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
+ wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
+}
+
+
+static inline int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{
int s, i;
@@ -1257,7 +1375,7 @@ static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s;
}
-static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static inline int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
{
int s, i;
@@ -1341,6 +1459,14 @@ static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
return s;
}
+static int sad16x16_c(void *s, uint8_t *a, uint8_t *b, int stride){
+ return pix_abs16x16_c(a,b,stride);
+}
+
+static int sad8x8_c(void *s, uint8_t *a, uint8_t *b, int stride){
+ return pix_abs8x8_c(a,b,stride);
+}
+
void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
{
int i;
@@ -1399,6 +1525,156 @@ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
dst[i+0] = src1[i+0]-src2[i+0];
}
+#define BUTTERFLY2(o1,o2,i1,i2) \
+o1= (i1)+(i2);\
+o2= (i1)-(i2);
+
+#define BUTTERFLY1(x,y) \
+{\
+ int a,b;\
+ a= x;\
+ b= y;\
+ x= a+b;\
+ y= a-b;\
+}
+
+#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
+
+static int hadamard8_diff_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride){
+ int i;
+ int temp[64];
+ int sum=0;
+
+ for(i=0; i<8; i++){
+ //FIXME try pointer walks
+ BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
+ BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
+ BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
+ BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
+
+ BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+ BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+ BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+ BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+
+ BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+ BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+ BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+ BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+ }
+
+ for(i=0; i<8; i++){
+ BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+ BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+ BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+ BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+
+ BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+ BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+ BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+ BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+
+ sum +=
+ BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+ +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+ +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+ +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+ }
+#if 0
+static int maxi=0;
+if(sum>maxi){
+ maxi=sum;
+ printf("MAX:%d\n", maxi);
+}
+#endif
+ return sum;
+}
+
+static int hadamard8_abs_c(uint8_t *src, int stride, int mean){
+ int i;
+ int temp[64];
+ int sum=0;
+//FIXME OOOPS ignore 0 term instead of mean mess
+ for(i=0; i<8; i++){
+ //FIXME try pointer walks
+ BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-mean,src[stride*i+1]-mean);
+ BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-mean,src[stride*i+3]-mean);
+ BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-mean,src[stride*i+5]-mean);
+ BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-mean,src[stride*i+7]-mean);
+
+ BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
+ BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
+ BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
+ BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
+
+ BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
+ BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
+ BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
+ BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
+ }
+
+ for(i=0; i<8; i++){
+ BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
+ BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
+ BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
+ BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
+
+ BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
+ BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
+ BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
+ BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
+
+ sum +=
+ BUTTERFLYA(temp[8*0+i], temp[8*4+i])
+ +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
+ +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
+ +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
+ }
+
+ return sum;
+}
+
+static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
+ MpegEncContext * const s= (MpegEncContext *)c;
+ DCTELEM temp[64];
+ int sum=0, i;
+
+ s->dsp.diff_pixels(temp, src1, src2, stride);
+ s->fdct(temp);
+
+ for(i=0; i<64; i++)
+ sum+= ABS(temp[i]);
+
+ return sum;
+}
+
+void simple_idct(INT16 *block); //FIXME
+
+static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){
+ MpegEncContext * const s= (MpegEncContext *)c;
+ DCTELEM temp[64], bak[64];
+ int sum=0, i;
+
+ s->mb_intra=0;
+
+ s->dsp.diff_pixels(temp, src1, src2, stride);
+
+ memcpy(bak, temp, 64*sizeof(DCTELEM));
+
+ s->dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
+ s->dct_unquantize(s, temp, 0, s->qscale);
+ simple_idct(temp); //FIXME
+
+ for(i=0; i<64; i++)
+ sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
+
+ return sum;
+}
+
+WARPER88_1616(hadamard8_diff_c, hadamard8_diff16_c)
+WARPER88_1616(dct_sad8x8_c, dct_sad16x16_c)
+WARPER88_1616(quant_psnr8x8_c, quant_psnr16x16_c)
+
void dsputil_init(DSPContext* c, unsigned mask)
{
static int init_done = 0;
@@ -1429,7 +1705,8 @@ void dsputil_init(DSPContext* c, unsigned mask)
c->clear_blocks = clear_blocks_c;
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
- c->pix_norm = pix_norm_c;
+ c->sse[0]= sse16_c;
+ c->sse[1]= sse8_c;
/* TODO [0] 16 [1] 8 */
c->pix_abs16x16 = pix_abs16x16_c;
@@ -1489,6 +1766,28 @@ void dsputil_init(DSPContext* c, unsigned mask)
/* dspfunc(avg_no_rnd_qpel, 1, 8); */
#undef dspfunc
+ c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
+ c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
+ c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
+ c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
+ c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
+ c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
+ c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
+ c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
+
+ c->hadamard8_diff[0]= hadamard8_diff16_c;
+ c->hadamard8_diff[1]= hadamard8_diff_c;
+ c->hadamard8_abs = hadamard8_abs_c;
+
+ c->dct_sad[0]= dct_sad16x16_c;
+ c->dct_sad[1]= dct_sad8x8_c;
+
+ c->sad[0]= sad16x16_c;
+ c->sad[1]= sad8x8_c;
+
+ c->quant_psnr[0]= quant_psnr16x16_c;
+ c->quant_psnr[1]= quant_psnr8x8_c;
+
c->add_bytes= add_bytes_c;
c->diff_bytes= diff_bytes_c;
@@ -1516,7 +1815,6 @@ void dsputil_init(DSPContext* c, unsigned mask)
#ifdef HAVE_MMI
dsputil_init_mmi(c, mask);
#endif
-
}
/* remove any non bit exact operation (testing purpose) */
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 29aca1ac22..b2cac91c9c 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -79,13 +79,10 @@ static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
/* motion estimation */
-typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
-/*
-int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
-int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
-int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
-int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
-*/
+typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
+
+typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size)/* __attribute__ ((const))*/;
+
typedef struct DSPContext {
/* pixel ops : interface with DCT */
void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
@@ -98,7 +95,16 @@ typedef struct DSPContext {
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(UINT8 * pix, int line_size);
int (*pix_norm1)(UINT8 * pix, int line_size);
- int (*pix_norm)(UINT8 * pix1, UINT8 * pix2, int line_size);
+ me_cmp_func sad[2]; /* identical to pix_absAxA except additional void * */
+ me_cmp_func sse[2];
+ me_cmp_func hadamard8_diff[2];
+ me_cmp_func dct_sad[2];
+ me_cmp_func quant_psnr[2];
+ int (*hadamard8_abs )(uint8_t *src, int stride, int mean);
+
+ me_cmp_func me_cmp[11];
+ me_cmp_func me_sub_cmp[11];
+ me_cmp_func mb_cmp[11];
/* maybe create an array for 16/8 functions */
op_pixels_func put_pixels_tab[2][4];
@@ -109,6 +115,7 @@ typedef struct DSPContext {
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
+ qpel_mc_func put_mspel_pixels_tab[8];
op_pixels_abs_func pix_abs16x16;
op_pixels_abs_func pix_abs16x16_x2;
@@ -120,9 +127,8 @@ typedef struct DSPContext {
op_pixels_abs_func pix_abs8x8_xy2;
/* huffyuv specific */
- //FIXME note: alignment isnt guranteed currently but could be if needed
void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
- void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 16*/,int w);
+ void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
} DSPContext;
void dsputil_init(DSPContext* p, unsigned mask);
@@ -156,6 +162,7 @@ static inline void emms(void)
__asm __volatile ("emms;":::"memory");
}
+
#define emms_c() \
{\
if (mm_flags & MM_MMX)\
@@ -281,6 +288,14 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
const FFTSample *input, FFTSample *tmp);
void ff_mdct_end(MDCTContext *s);
+#define WARPER88_1616(name8, name16)\
+static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride){\
+ return name8(s, dst , src , stride)\
+ +name8(s, dst+8 , src+8 , stride)\
+ +name8(s, dst +8*stride, src +8*stride, stride)\
+ +name8(s, dst+8+8*stride, src+8+8*stride, stride);\
+}
+
#ifndef HAVE_LRINTF
/* XXX: add ISOC specific test to avoid specific BSD testing. */
/* better than nothing implementation. */
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index bbeea3abd4..239bba8bab 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -204,10 +204,6 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */
put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */
- if (s->pict_type == I_TYPE)
- s->no_rounding = 0;
- else
- s->no_rounding ^= 1;
put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */
put_bits(&s->pb,2,0); /* Reserved */
put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
@@ -392,6 +388,57 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
}
}
+void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
+ const int mb_index= s->mb_x + s->mb_y*s->mb_width;
+ int xy= s->block_index[0];
+ uint16_t time_pp= s->pp_time;
+ uint16_t time_pb= s->pb_time;
+ int i;
+
+ //FIXME avoid divides
+ switch(s->co_located_type_table[mb_index]){
+ case 0:
+ s->mv_type= MV_TYPE_16X16;
+ s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
+ s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
+ s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
+ : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
+ s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1]
+ : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
+ break;
+ case CO_LOCATED_TYPE_4MV:
+ s->mv_type = MV_TYPE_8X8;
+ for(i=0; i<4; i++){
+ xy= s->block_index[i];
+ s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
+ s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
+ s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0]
+ : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
+ s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1]
+ : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
+ }
+ break;
+ case CO_LOCATED_TYPE_FIELDMV:
+ s->mv_type = MV_TYPE_FIELD;
+ for(i=0; i<2; i++){
+ if(s->top_field_first){
+ time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
+ time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
+ }else{
+ time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
+ time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
+ }
+ s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
+ s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
+ s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
+ : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
+ s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1]
+ : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
+ }
+ break;
+ }
+}
+
#ifdef CONFIG_ENCODERS
void mpeg4_encode_mb(MpegEncContext * s,
DCTELEM block[6][64],
@@ -442,7 +489,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
return;
}
-
+
if ((cbp | motion_x | motion_y | mb_type) ==0) {
/* direct MB with MV={0,0} */
assert(s->dquant==0);
@@ -1386,7 +1433,7 @@ void h263_encode_init(MpegEncContext *s)
init_mv_penalty_and_fcode(s);
}
- s->mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
+ s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
// use fcodes >1 only for mpeg4 & h263 & h263p FIXME
switch(s->codec_id){
@@ -1519,7 +1566,7 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
static void mpeg4_encode_vol_header(MpegEncContext * s)
{
- int vo_ver_id=1; //must be 2 if we want GMC or q-pel
+ int vo_ver_id=2; //must be 2 if we want GMC or q-pel
char buf[255];
if(s->max_b_frames){
@@ -1584,7 +1631,7 @@ static void mpeg4_encode_vol_header(MpegEncContext * s)
if(s->mpeg_quant) put_bits(&s->pb, 2, 0); /* no custom matrixes */
if (vo_ver_id != 1)
- put_bits(&s->pb, 1, s->quarter_sample=0);
+ put_bits(&s->pb, 1, s->quarter_sample);
put_bits(&s->pb, 1, 1); /* complexity estimation disable */
s->resync_marker= s->rtp_mode;
put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
@@ -1618,7 +1665,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
int time_div, time_mod;
if(s->pict_type==I_TYPE){
- s->no_rounding=0;
if(picture_number==0 || !s->strict_std_compliance)
mpeg4_encode_vol_header(s);
}
@@ -1645,7 +1691,6 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, 1); /* vop coded */
if ( s->pict_type == P_TYPE
|| (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
- s->no_rounding ^= 1;
put_bits(&s->pb, 1, s->no_rounding); /* rounding type */
}
put_bits(&s->pb, 3, 0); /* intra dc VLC threshold */
@@ -1996,6 +2041,61 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
}
#endif
}
+
+static inline int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
+ UINT8 *scan_table)
+{
+ int i, last_non_zero;
+ const RLTable *rl;
+ UINT8 *len_tab;
+ const int last_index = s->block_last_index[n];
+ int len=0;
+
+ if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
+ /* mpeg4 based DC predictor */
+ //mpeg4_encode_dc(dc_pb, intra_dc, n); //FIXME
+ if(last_index<1) return len;
+ i = 1;
+ rl = &rl_intra;
+ len_tab = uni_mpeg4_intra_rl_len;
+ } else {
+ if(last_index<0) return 0;
+ i = 0;
+ rl = &rl_inter;
+ len_tab = uni_mpeg4_inter_rl_len;
+ }
+
+ /* AC coefs */
+ last_non_zero = i - 1;
+ for (; i < last_index; i++) {
+ int level = block[ scan_table[i] ];
+ if (level) {
+ int run = i - last_non_zero - 1;
+ level+=64;
+ if((level&(~127)) == 0){
+ const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
+ len += len_tab[index];
+ }else{ //ESC3
+ len += 7+2+1+6+1+12+1;
+ }
+ last_non_zero = i;
+ }
+ }
+ /*if(i<=last_index)*/{
+ int level = block[ scan_table[i] ];
+ int run = i - last_non_zero - 1;
+ level+=64;
+ if((level&(~127)) == 0){
+ const int index= UNI_MPEG4_ENC_INDEX(1, run, level);
+ len += len_tab[index];
+ }else{ //ESC3
+ len += 7+2+1+6+1+12+1;
+ }
+ }
+
+ return len;
+}
+
#endif
@@ -3050,8 +3150,6 @@ int ff_h263_decode_mb(MpegEncContext *s,
int modb1; // first bit of modb
int modb2; // second bit of modb
int mb_type;
- uint16_t time_pp;
- uint16_t time_pb;
int xy;
s->mb_intra = 0; //B-frames never contain intra blocks
@@ -3173,9 +3271,6 @@ int ff_h263_decode_mb(MpegEncContext *s,
}
if(mb_type==4 || mb_type==MB_TYPE_B_DIRECT){
- int mb_index= s->mb_x + s->mb_y*s->mb_width;
- int i;
-
if(mb_type==4)
mx=my=0;
else{
@@ -3184,55 +3279,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
}
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
- xy= s->block_index[0];
- time_pp= s->pp_time;
- time_pb= s->pb_time;
-
- //FIXME avoid divides
- switch(s->co_located_type_table[mb_index]){
- case 0:
- s->mv_type= MV_TYPE_16X16;
- s->mv[0][0][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
- s->mv[0][0][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
- s->mv[1][0][0] = mx ? s->mv[0][0][0] - s->motion_val[xy][0]
- : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
- s->mv[1][0][1] = my ? s->mv[0][0][1] - s->motion_val[xy][1]
- : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
- PRINT_MB_TYPE(mb_type==4 ? "D" : "S");
- break;
- case CO_LOCATED_TYPE_4MV:
- s->mv_type = MV_TYPE_8X8;
- for(i=0; i<4; i++){
- xy= s->block_index[i];
- s->mv[0][i][0] = s->motion_val[xy][0]*time_pb/time_pp + mx;
- s->mv[0][i][1] = s->motion_val[xy][1]*time_pb/time_pp + my;
- s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->motion_val[xy][0]
- : s->motion_val[xy][0]*(time_pb - time_pp)/time_pp;
- s->mv[1][i][1] = my ? s->mv[0][i][1] - s->motion_val[xy][1]
- : s->motion_val[xy][1]*(time_pb - time_pp)/time_pp;
- }
- PRINT_MB_TYPE("4");
- break;
- case CO_LOCATED_TYPE_FIELDMV:
- s->mv_type = MV_TYPE_FIELD;
- for(i=0; i<2; i++){
- if(s->top_field_first){
- time_pp= s->pp_field_time - s->field_select_table[mb_index][i] + i;
- time_pb= s->pb_field_time - s->field_select_table[mb_index][i] + i;
- }else{
- time_pp= s->pp_field_time + s->field_select_table[mb_index][i] - i;
- time_pb= s->pb_field_time + s->field_select_table[mb_index][i] - i;
- }
- s->mv[0][i][0] = s->field_mv_table[mb_index][i][0]*time_pb/time_pp + mx;
- s->mv[0][i][1] = s->field_mv_table[mb_index][i][1]*time_pb/time_pp + my;
- s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->field_mv_table[mb_index][i][0]
- : s->field_mv_table[mb_index][i][0]*(time_pb - time_pp)/time_pp;
- s->mv[1][i][1] = my ? s->mv[0][i][1] - s->field_mv_table[mb_index][i][1]
- : s->field_mv_table[mb_index][i][1]*(time_pb - time_pp)/time_pp;
- }
- PRINT_MB_TYPE("=");
- break;
- }
+ ff_mpeg4_set_direct_mv(s, mx, my);
}
if(mb_type<0 || mb_type>4){
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index e7c49237e9..9a22310c17 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -40,7 +40,7 @@ static inline long long rdtsc()
}
#endif
-static int h263_decode_init(AVCodecContext *avctx)
+int ff_h263_decode_init(AVCodecContext *avctx)
{
MpegEncContext *s = avctx->priv_data;
@@ -113,7 +113,7 @@ static int h263_decode_init(AVCodecContext *avctx)
return 0;
}
-static int h263_decode_end(AVCodecContext *avctx)
+int ff_h263_decode_end(AVCodecContext *avctx)
{
MpegEncContext *s = avctx->priv_data;
@@ -343,7 +343,7 @@ static int mpeg4_find_frame_end(MpegEncContext *s, UINT8 *buf, int buf_size){
return -1;
}
-static int h263_decode_frame(AVCodecContext *avctx,
+int ff_h263_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
UINT8 *buf, int buf_size)
{
@@ -416,9 +416,11 @@ retry:
if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
return -1;
}
-
+
/* let's go :-) */
- if (s->h263_msmpeg4) {
+ if (s->msmpeg4_version==5) {
+ ret= ff_wmv2_decode_picture_header(s);
+ } else if (s->msmpeg4_version) {
ret = msmpeg4_decode_picture_header(s);
} else if (s->h263_pred) {
if(s->avctx->extradata_size && s->picture_number==0){
@@ -634,7 +636,6 @@ retry:
}
if(num_end_markers || error){
fprintf(stderr, "concealing errors\n");
-//printf("type:%d\n", s->pict_type);
ff_error_resilience(s);
}
}
@@ -713,10 +714,10 @@ AVCodec mpeg4_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_MPEG4,
sizeof(MpegEncContext),
- h263_decode_init,
+ ff_h263_decode_init,
NULL,
- h263_decode_end,
- h263_decode_frame,
+ ff_h263_decode_end,
+ ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
};
@@ -725,10 +726,10 @@ AVCodec h263_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_H263,
sizeof(MpegEncContext),
- h263_decode_init,
+ ff_h263_decode_init,
NULL,
- h263_decode_end,
- h263_decode_frame,
+ ff_h263_decode_end,
+ ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@@ -737,10 +738,10 @@ AVCodec msmpeg4v1_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V1,
sizeof(MpegEncContext),
- h263_decode_init,
+ ff_h263_decode_init,
NULL,
- h263_decode_end,
- h263_decode_frame,
+ ff_h263_decode_end,
+ ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@@ -749,10 +750,10 @@ AVCodec msmpeg4v2_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V2,
sizeof(MpegEncContext),
- h263_decode_init,
+ ff_h263_decode_init,
NULL,
- h263_decode_end,
- h263_decode_frame,
+ ff_h263_decode_end,
+ ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@@ -761,10 +762,10 @@ AVCodec msmpeg4v3_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_MSMPEG4V3,
sizeof(MpegEncContext),
- h263_decode_init,
+ ff_h263_decode_init,
NULL,
- h263_decode_end,
- h263_decode_frame,
+ ff_h263_decode_end,
+ ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@@ -773,22 +774,10 @@ AVCodec wmv1_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_WMV1,
sizeof(MpegEncContext),
- h263_decode_init,
- NULL,
- h263_decode_end,
- h263_decode_frame,
- CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
-};
-
-AVCodec wmv2_decoder = {
- "wmv2",
- CODEC_TYPE_VIDEO,
- CODEC_ID_WMV2,
- sizeof(MpegEncContext),
- h263_decode_init,
+ ff_h263_decode_init,
NULL,
- h263_decode_end,
- h263_decode_frame,
+ ff_h263_decode_end,
+ ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
@@ -797,10 +786,10 @@ AVCodec h263i_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_H263I,
sizeof(MpegEncContext),
- h263_decode_init,
+ ff_h263_decode_init,
NULL,
- h263_decode_end,
- h263_decode_frame,
+ ff_h263_decode_end,
+ ff_h263_decode_frame,
CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
};
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 12a3601546..b9ebc31136 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -43,6 +43,11 @@ int pix_abs8x8_x2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx);
+int sad16x16_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
+int sad8x8_mmx(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
+int sad16x16_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
+int sad8x8_mmx2(void *s, UINT8 *blk1, UINT8 *blk2, int lx);
+
/* pixel operations */
static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL;
static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
@@ -213,7 +218,7 @@ static void get_pixels_mmx(DCTELEM *block, const UINT8 *pixels, int line_size)
);
}
-static void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
+static inline void diff_pixels_mmx(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride)
{
asm volatile(
"pxor %%mm7, %%mm7 \n\t"
@@ -496,7 +501,150 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
for(; i<w; i++)
dst[i+0] = src1[i+0]-src2[i+0];
}
+#define LBUTTERFLY(a,b)\
+ "paddw " #b ", " #a " \n\t"\
+ "paddw " #b ", " #b " \n\t"\
+ "psubw " #a ", " #b " \n\t"
+
+#define HADAMARD48\
+ LBUTTERFLY(%%mm0, %%mm1)\
+ LBUTTERFLY(%%mm2, %%mm3)\
+ LBUTTERFLY(%%mm4, %%mm5)\
+ LBUTTERFLY(%%mm6, %%mm7)\
+ \
+ LBUTTERFLY(%%mm0, %%mm2)\
+ LBUTTERFLY(%%mm1, %%mm3)\
+ LBUTTERFLY(%%mm4, %%mm6)\
+ LBUTTERFLY(%%mm5, %%mm7)\
+ \
+ LBUTTERFLY(%%mm0, %%mm4)\
+ LBUTTERFLY(%%mm1, %%mm5)\
+ LBUTTERFLY(%%mm2, %%mm6)\
+ LBUTTERFLY(%%mm3, %%mm7)
+
+#define MMABS(a,z)\
+ "pxor " #z ", " #z " \n\t"\
+ "pcmpgtw " #a ", " #z " \n\t"\
+ "pxor " #z ", " #a " \n\t"\
+ "psubw " #z ", " #a " \n\t"
+
+#define MMABS_SUM(a,z, sum)\
+ "pxor " #z ", " #z " \n\t"\
+ "pcmpgtw " #a ", " #z " \n\t"\
+ "pxor " #z ", " #a " \n\t"\
+ "psubw " #z ", " #a " \n\t"\
+ "paddusw " #a ", " #sum " \n\t"
+
+
+#define SBUTTERFLY(a,b,t,n)\
+ "movq " #a ", " #t " \n\t" /* abcd */\
+ "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
+ "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
+
+#define TRANSPOSE4(a,b,c,d,t)\
+ SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
+ SBUTTERFLY(c,d,b,wd) /* c=imjn b=kolp */\
+ SBUTTERFLY(a,c,d,dq) /* a=aeim d=bfjn */\
+ SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
+
+#define LOAD4(o, a, b, c, d)\
+ "movq "#o"(%1), " #a " \n\t"\
+ "movq "#o"+16(%1), " #b " \n\t"\
+ "movq "#o"+32(%1), " #c " \n\t"\
+ "movq "#o"+48(%1), " #d " \n\t"
+
+#define STORE4(o, a, b, c, d)\
+ "movq "#a", "#o"(%1) \n\t"\
+ "movq "#b", "#o"+16(%1) \n\t"\
+ "movq "#c", "#o"+32(%1) \n\t"\
+ "movq "#d", "#o"+48(%1) \n\t"\
+
+static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride){
+ uint64_t temp[16] __align8;
+ int sum=0;
+
+ diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
+ asm volatile(
+ LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
+ LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
+
+ HADAMARD48
+
+ "movq %%mm7, 112(%1) \n\t"
+
+ TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
+ STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
+
+ "movq 112(%1), %%mm7 \n\t"
+ TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
+ STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
+
+ LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
+ LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
+
+ HADAMARD48
+
+ "movq %%mm7, 120(%1) \n\t"
+
+ TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
+ STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
+
+ "movq 120(%1), %%mm7 \n\t"
+ TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
+ "movq %%mm7, %%mm5 \n\t"//FIXME remove
+ "movq %%mm6, %%mm7 \n\t"
+ "movq %%mm0, %%mm6 \n\t"
+// STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
+
+ LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
+// LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
+
+ HADAMARD48
+ "movq %%mm7, 64(%1) \n\t"
+ MMABS(%%mm0, %%mm7)
+ MMABS_SUM(%%mm1, %%mm7, %%mm0)
+ MMABS_SUM(%%mm2, %%mm7, %%mm0)
+ MMABS_SUM(%%mm3, %%mm7, %%mm0)
+ MMABS_SUM(%%mm4, %%mm7, %%mm0)
+ MMABS_SUM(%%mm5, %%mm7, %%mm0)
+ MMABS_SUM(%%mm6, %%mm7, %%mm0)
+ "movq 64(%1), %%mm1 \n\t"
+ MMABS_SUM(%%mm1, %%mm7, %%mm0)
+ "movq %%mm0, 64(%1) \n\t"
+
+ LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
+ LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
+
+ HADAMARD48
+ "movq %%mm7, (%1) \n\t"
+ MMABS(%%mm0, %%mm7)
+ MMABS_SUM(%%mm1, %%mm7, %%mm0)
+ MMABS_SUM(%%mm2, %%mm7, %%mm0)
+ MMABS_SUM(%%mm3, %%mm7, %%mm0)
+ MMABS_SUM(%%mm4, %%mm7, %%mm0)
+ MMABS_SUM(%%mm5, %%mm7, %%mm0)
+ MMABS_SUM(%%mm6, %%mm7, %%mm0)
+ "movq (%1), %%mm1 \n\t"
+ MMABS_SUM(%%mm1, %%mm7, %%mm0)
+ "movq 64(%1), %%mm1 \n\t"
+ MMABS_SUM(%%mm1, %%mm7, %%mm0)
+
+ "movq %%mm0, %%mm1 \n\t"
+ "psrlq $32, %%mm0 \n\t"
+ "paddusw %%mm1, %%mm0 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "psrlq $16, %%mm0 \n\t"
+ "paddusw %%mm1, %%mm0 \n\t"
+ "movd %%mm0, %0 \n\t"
+
+ : "=r" (sum)
+ : "r"(temp)
+ );
+ return sum&0xFFFF;
+}
+
+WARPER88_1616(hadamard8_diff_mmx, hadamard8_diff16_mmx)
#if 0
static void just_return() { return; }
@@ -579,7 +727,13 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c->add_bytes= add_bytes_mmx;
c->diff_bytes= diff_bytes_mmx;
-
+
+ c->hadamard8_diff[0]= hadamard8_diff16_mmx;
+ c->hadamard8_diff[1]= hadamard8_diff_mmx;
+
+ c->sad[0]= sad16x16_mmx;
+ c->sad[1]= sad8x8_mmx;
+
if (mm_flags & MM_MMXEXT) {
c->pix_abs16x16 = pix_abs16x16_mmx2;
c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
@@ -591,6 +745,9 @@ void dsputil_init_mmx(DSPContext* c, unsigned mask)
c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
+ c->sad[0]= sad16x16_mmx2;
+ c->sad[1]= sad8x8_mmx2;
+
c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
diff --git a/libavcodec/i386/motion_est_mmx.c b/libavcodec/i386/motion_est_mmx.c
index 3368e73331..fa85db67b6 100644
--- a/libavcodec/i386/motion_est_mmx.c
+++ b/libavcodec/i386/motion_est_mmx.c
@@ -274,6 +274,15 @@ int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
\
return sum_ ## suf();\
}\
+int sad8x8_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t":);\
+\
+ sad8_ ## suf(blk1, blk2, stride, 3);\
+\
+ return sum_ ## suf();\
+}\
\
int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
{\
@@ -324,6 +333,16 @@ int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
\
return sum_ ## suf();\
}\
+int sad16x16_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\
+{\
+ asm volatile("pxor %%mm7, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t":);\
+\
+ sad8_ ## suf(blk1 , blk2 , stride, 4);\
+ sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
+\
+ return sum_ ## suf();\
+}\
int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\
{\
asm volatile("pxor %%mm7, %%mm7 \n\t"\
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 3c688f7e03..ec531c3f3b 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -26,8 +26,10 @@
#include "dsputil.h"
#include "mpegvideo.h"
+//#undef NDEBUG
+//#include <assert.h>
+
#define SQ(a) ((a)*(a))
-#define INTER_BIAS 257
#define P_LAST P[0]
#define P_LEFT P[1]
@@ -40,7 +42,295 @@
#define P_LAST_BOTTOM P[8]
#define P_MV1 P[9]
+static inline int sad_hpel_motion_search(MpegEncContext * s,
+ int *mx_ptr, int *my_ptr, int dmin,
+ int xmin, int ymin, int xmax, int ymax,
+ int pred_x, int pred_y, Picture *picture,
+ int n, int size, uint16_t * const mv_penalty);
+
+static inline int update_map_generation(MpegEncContext * s)
+{
+ s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
+ if(s->me.map_generation==0){
+ s->me.map_generation= 1<<(ME_MAP_MV_BITS*2);
+ memset(s->me.map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
+ }
+ return s->me.map_generation;
+}
+
+
+
+/* SIMPLE */
+#define RENAME(a) simple_ ## a
+
+#define CMP(d, x, y, size)\
+d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
+
+#define CMP_HPEL(d, dx, dy, x, y, size)\
+{\
+ const int dxy= (dx) + 2*(dy);\
+ hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
+ d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
+}
+
+#define CMP_QPEL(d, dx, dy, x, y, size)\
+{\
+ const int dxy= (dx) + 4*(dy);\
+ qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
+ d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
+}
+
+#include "motion_est_template.c"
+#undef RENAME
+#undef CMP
+#undef CMP_HPEL
+#undef CMP_QPEL
+#undef INIT
+
+/* SIMPLE CHROMA */
+#define RENAME(a) simple_chroma_ ## a
+
+#define CMP(d, x, y, size)\
+d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\
+if(chroma_cmp){\
+ int dxy= ((x)&1) + 2*((y)&1);\
+ int c= ((x)>>1) + ((y)>>1)*uvstride;\
+\
+ chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
+ d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\
+ chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
+ d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\
+}
+
+#define CMP_HPEL(d, dx, dy, x, y, size)\
+{\
+ const int dxy= (dx) + 2*(dy);\
+ hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
+ d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
+ if(chroma_cmp_sub){\
+ int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
+ int c= ((x)>>1) + ((y)>>1)*uvstride;\
+ chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
+ d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
+ chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
+ d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
+ }\
+}
+
+#define CMP_QPEL(d, dx, dy, x, y, size)\
+{\
+ const int dxy= (dx) + 4*(dy);\
+ qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
+ d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
+ if(chroma_cmp_sub){\
+ int cxy, c;\
+ int cx= (4*(x) + (dx))/2;\
+ int cy= (4*(y) + (dy))/2;\
+ cx= (cx>>1)|(cx&1);\
+ cy= (cy>>1)|(cy&1);\
+ cxy= (cx&1) + 2*(cy&1);\
+ c= ((cx)>>1) + ((cy)>>1)*uvstride;\
+ chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
+ d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
+ chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
+ d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
+ }\
+}
+
+#include "motion_est_template.c"
+#undef RENAME
+#undef CMP
+#undef CMP_HPEL
+#undef CMP_QPEL
+#undef INIT
+
+/* SIMPLE DIRECT HPEL */
+#define RENAME(a) simple_direct_hpel_ ## a
+//FIXME precalc divisions stuff
+
+#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
+if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*ymax){\
+ const int hx= 2*(x) + (dx);\
+ const int hy= 2*(y) + (dy);\
+ if(s->mv_type==MV_TYPE_8X8){\
+ int i;\
+ for(i=0; i<4; i++){\
+ int fx = s->me.direct_basis_mv[i][0] + hx;\
+ int fy = s->me.direct_basis_mv[i][1] + hy;\
+ int bx = hx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
+ int by = hy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
+ int fxy= (fx&1) + 2*(fy&1);\
+ int bxy= (bx&1) + 2*(by&1);\
+\
+ uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
+ hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
+ hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\
+ }\
+ }else{\
+ int fx = s->me.direct_basis_mv[0][0] + hx;\
+ int fy = s->me.direct_basis_mv[0][1] + hy;\
+ int bx = hx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
+ int by = hy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
+ int fxy= (fx&1) + 2*(fy&1);\
+ int bxy= (bx&1) + 2*(by&1);\
+\
+ hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
+ hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\
+ }\
+ d = cmp_func(s, s->me.scratchpad, src_y, stride);\
+}else\
+ d= 256*256*256*32;
+
+
+#define CMP_HPEL(d, dx, dy, x, y, size)\
+ CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
+
+#define CMP(d, x, y, size)\
+ CMP_DIRECT(d, 0, 0, x, y, size, cmp)
+
+#include "motion_est_template.c"
+#undef RENAME
+#undef CMP
+#undef CMP_HPEL
+#undef CMP_QPEL
+#undef INIT
+#undef CMP_DIRECT
+
+/* SIMPLE DIRECT QPEL */
+#define RENAME(a) simple_direct_qpel_ ## a
+
+#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
+if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*ymax){\
+ const int qx= 4*(x) + (dx);\
+ const int qy= 4*(y) + (dy);\
+ if(s->mv_type==MV_TYPE_8X8){\
+ int i;\
+ for(i=0; i<4; i++){\
+ int fx = s->me.direct_basis_mv[i][0] + qx;\
+ int fy = s->me.direct_basis_mv[i][1] + qy;\
+ int bx = qx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
+ int by = qy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
+ int fxy= (fx&3) + 4*(fy&3);\
+ int bxy= (bx&3) + 4*(by&3);\
+\
+ uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
+ qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
+ qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
+ }\
+ }else{\
+ int fx = s->me.direct_basis_mv[0][0] + qx;\
+ int fy = s->me.direct_basis_mv[0][1] + qy;\
+ int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
+ int by = qy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
+ int fxy= (fx&3) + 4*(fy&3);\
+ int bxy= (bx&3) + 4*(by&3);\
+\
+ qpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
+ qpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
+ }\
+ d = cmp_func(s, s->me.scratchpad, src_y, stride);\
+}else\
+ d= 256*256*256*32;
+
+
+#define CMP_QPEL(d, dx, dy, x, y, size)\
+ CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
+
+#define CMP(d, x, y, size)\
+ CMP_DIRECT(d, 0, 0, x, y, size, cmp)
+
+#include "motion_est_template.c"
+#undef RENAME
+#undef CMP
+#undef CMP_HPEL
+#undef CMP_QPEL
+#undef INIT
+#undef CMP__DIRECT
+
+
+static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){
+ return 0;
+}
+
+static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
+ DSPContext* c= &s->dsp;
+ int i;
+
+ memset(cmp, 0, sizeof(void*)*11);
+
+ switch(type&0xFF){
+ case FF_CMP_SAD:
+ cmp[0]= c->sad[0];
+ cmp[1]= c->sad[1];
+ break;
+ case FF_CMP_SATD:
+ cmp[0]= c->hadamard8_diff[0];
+ cmp[1]= c->hadamard8_diff[1];
+ break;
+ case FF_CMP_SSE:
+ cmp[0]= c->sse[0];
+ cmp[1]= c->sse[1];
+ break;
+ case FF_CMP_DCT:
+ cmp[0]= c->dct_sad[0];
+ cmp[1]= c->dct_sad[1];
+ break;
+ case FF_CMP_PSNR:
+ cmp[0]= c->quant_psnr[0];
+ cmp[1]= c->quant_psnr[1];
+ break;
+ case FF_CMP_ZERO:
+ for(i=0; i<7; i++){
+ cmp[i]= zero_cmp;
+ }
+ break;
+ default:
+ fprintf(stderr,"internal error in cmp function selection\n");
+ }
+};
+
+static inline int get_penalty_factor(MpegEncContext *s, int type){
+
+ switch(type){
+ default:
+ case FF_CMP_SAD:
+ return s->qscale;
+ case FF_CMP_SSE:
+// return s->qscale*8;
+ case FF_CMP_DCT:
+ case FF_CMP_SATD:
+ return s->qscale*8;
+ }
+}
+
+void ff_init_me(MpegEncContext *s){
+ set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp);
+ set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
+ set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp);
+ if(s->flags&CODEC_FLAG_QPEL){
+ if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
+ s->me.sub_motion_search= simple_chroma_qpel_motion_search;
+ else
+ s->me.sub_motion_search= simple_qpel_motion_search;
+ }else{
+ if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
+ s->me.sub_motion_search= simple_chroma_hpel_motion_search;
+ else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD)
+ s->me.sub_motion_search= sad_hpel_motion_search;
+ else
+ s->me.sub_motion_search= simple_hpel_motion_search;
+ }
+
+ if(s->avctx->me_cmp&FF_CMP_CHROMA){
+ s->me.motion_search[0]= simple_chroma_epzs_motion_search;
+ s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
+ }else{
+ s->me.motion_search[0]= simple_epzs_motion_search;
+ s->me.motion_search[1]= simple_epzs_motion_search4;
+ }
+}
+
static int pix_dev(UINT8 * pix, int line_size, int mean)
{
int s, i, j;
@@ -294,492 +584,39 @@ static int phods_motion_search(MpegEncContext * s,
#define Z_THRESHOLD 256
-#define CHECK_MV(x,y)\
-{\
- const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
- const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
- if(map[index]!=key){\
- d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
- COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
- map[index]= key;\
- score_map[index]= d;\
- }\
-}
-
-#define CHECK_MV_DIR(x,y,new_dir)\
-{\
- const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
- const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
- if(map[index]!=key){\
- d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
- if(d<dmin){\
- best[0]=x;\
- best[1]=y;\
- dmin=d;\
- next_dir= new_dir;\
- }\
- map[index]= key;\
- score_map[index]= d;\
- }\
-}
-
-#define CHECK_MV4(x,y)\
-{\
- const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
- const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
- if(map[index]!=key){\
- d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
- COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
- map[index]= key;\
- score_map[index]= d;\
- }\
-}
-
-#define check(x,y,S,v)\
-if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
-if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
-if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
-if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
-
-
-static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
- UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
- int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
- int xmin, int ymin, int xmax, int ymax, int shift,
- uint32_t *map, uint16_t *score_map, int map_generation,
- op_pixels_abs_func pix_abs)
-{
- int next_dir=-1;
-
- for(;;){
- int d;
- const int dir= next_dir;
- const int x= best[0];
- const int y= best[1];
- next_dir=-1;
-
-//printf("%d", dir);
- if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
- if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
- if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
- if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
-
- if(next_dir==-1){
- return dmin;
- }
- }
-
-/* for(;;){
- int d;
- const int x= best[0];
- const int y= best[1];
- const int last_min=dmin;
- if(x>xmin) CHECK_MV(x-1, y )
- if(y>xmin) CHECK_MV(x , y-1)
- if(x<xmax) CHECK_MV(x+1, y )
- if(y<xmax) CHECK_MV(x , y+1)
- if(x>xmin && y>ymin) CHECK_MV(x-1, y-1)
- if(x>xmin && y<ymax) CHECK_MV(x-1, y+1)
- if(x<xmax && y>ymin) CHECK_MV(x+1, y-1)
- if(x<xmax && y<ymax) CHECK_MV(x+1, y+1)
- if(x-1>xmin) CHECK_MV(x-2, y )
- if(y-1>xmin) CHECK_MV(x , y-2)
- if(x+1<xmax) CHECK_MV(x+2, y )
- if(y+1<xmax) CHECK_MV(x , y+2)
- if(x-1>xmin && y-1>ymin) CHECK_MV(x-2, y-2)
- if(x-1>xmin && y+1<ymax) CHECK_MV(x-2, y+2)
- if(x+1<xmax && y-1>ymin) CHECK_MV(x+2, y-2)
- if(x+1<xmax && y+1<ymax) CHECK_MV(x+2, y+2)
- if(dmin==last_min) return dmin;
- }
- */
-}
-
-#if 1
-#define SNAKE_1 3
-#define SNAKE_2 2
-#else
-#define SNAKE_1 7
-#define SNAKE_2 3
-#endif
-static inline int snake_search(MpegEncContext * s, int *best, int dmin,
- UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
- int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
- int xmin, int ymin, int xmax, int ymax, int shift,
- uint32_t *map, uint16_t *score_map,int map_generation,
- op_pixels_abs_func pix_abs)
-{
- int dir=0;
- int c=1;
- static int x_dir[8]= {1,1,0,-1,-1,-1, 0, 1};
- static int y_dir[8]= {0,1,1, 1, 0,-1,-1,-1};
- int fails=0;
- int last_d[2]={dmin, dmin};
-
-/*static int good=0;
-static int bad=0;
-static int point=0;
-
-point++;
-if(256*256*256*64%point==0)
-{
- printf("%d %d %d\n", good, bad, point);
-}*/
-
- for(;;){
- int x= best[0];
- int y= best[1];
- int d;
- x+=x_dir[dir];
- y+=y_dir[dir];
- if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
- const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;
- const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
- if(map[index]!=key){
- d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
- d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
- map[index]=key;
- score_map[index]=d;
- }else
- d= dmin+1;
- }else{
- d = dmin + 10000; //FIXME smarter boundary handling
- }
- if(d<dmin){
- best[0]=x;
- best[1]=y;
- dmin=d;
-
- if(last_d[1] - last_d[0] > last_d[0] - d) c= -c;
- dir+=c;
-
- fails=0;
-//good++;
- last_d[1]=last_d[0];
- last_d[0]=d;
- }else{
-//bad++;
- if(fails){
- if(fails>=SNAKE_1+1) return dmin;
- }else{
- if(dir&1) dir-= c*3;
- else c= -c;
-// c= -c;
- }
- dir+=c*SNAKE_2;
- fails++;
- }
- dir&=7;
- }
-}
-
-static inline int cross_search(MpegEncContext * s, int *best, int dmin,
- UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
- int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
- int xmin, int ymin, int xmax, int ymax, int shift,
- uint32_t *map, uint16_t *score_map,int map_generation,
- op_pixels_abs_func pix_abs)
-{
- static int x_dir[4]= {-1, 0, 1, 0};
- static int y_dir[4]= { 0,-1, 0, 1};
- int improvement[2]={100000, 100000};
- int dirs[2]={2, 3};
- int dir;
- int last_dir= -1;
-
- for(;;){
- dir= dirs[ improvement[0] > improvement[1] ? 0 : 1 ];
- if(improvement[dir&1]==-1) return dmin;
-
- {
- const int x= best[0] + x_dir[dir];
- const int y= best[1] + y_dir[dir];
- const int key= (y<<ME_MAP_MV_BITS) + x + map_generation;
- const int index= ((y<<ME_MAP_SHIFT) + x)&(ME_MAP_SIZE-1);
- int d;
- if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
- if(map[index]!=key){
- d = pix_abs(new_pic, old_pic + x + y*pic_stride, pic_stride);
- d += (mv_penalty[(x<<shift)-pred_x] + mv_penalty[(y<<shift)-pred_y])*quant;
- map[index]=key;
- score_map[index]=d;
- if(d<dmin){
- improvement[dir&1]= dmin-d;
- improvement[(dir&1)^1]++;
- dmin=d;
- best[0]= x;
- best[1]= y;
- last_dir=dir;
- continue;
- }
- }else{
- d= score_map[index];
- }
- }else{
- d= dmin + 1000; //FIXME is this a good idea?
- }
- /* evaluated point was cached or checked and worse */
-
- if(last_dir==dir){
- improvement[dir&1]= -1;
- }else{
- improvement[dir&1]= d-dmin;
- last_dir= dirs[dir&1]= dir^2;
- }
- }
- }
-}
-
-static inline int update_map_generation(MpegEncContext * s)
-{
- s->me_map_generation+= 1<<(ME_MAP_MV_BITS*2);
- if(s->me_map_generation==0){
- s->me_map_generation= 1<<(ME_MAP_MV_BITS*2);
- memset(s->me_map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
- }
- return s->me_map_generation;
-}
-
-static int epzs_motion_search(MpegEncContext * s,
- int *mx_ptr, int *my_ptr,
- int P[10][2], int pred_x, int pred_y,
- int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture)
-{
- int best[2]={0, 0};
- int d, dmin;
- UINT8 *new_pic, *old_pic;
- const int pic_stride= s->linesize;
- const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16;
- UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
- int quant= s->qscale; // qscale of the prev frame
- const int shift= 1+s->quarter_sample;
- uint32_t *map= s->me_map;
- uint16_t *score_map= s->me_score_map;
- int map_generation;
-
- new_pic = s->new_picture.data[0] + pic_xy;
- old_pic = ref_picture + pic_xy;
-
- map_generation= update_map_generation(s);
-
- dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride);
- map[0]= map_generation;
- score_map[0]= dmin;
-
- /* first line */
- if ((s->mb_y == 0 || s->first_slice_line)) {
- CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
- CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
- }else{
- if(dmin<256 && ( P_LEFT[0] |P_LEFT[1]
- |P_TOP[0] |P_TOP[1]
- |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
- *mx_ptr= 0;
- *my_ptr= 0;
- s->skip_me=1;
- return dmin;
- }
- CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
- if(dmin>256*2){
- CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift)
- CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
- CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
- CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
- }
- }
- if(dmin>256*4){
- CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
- CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
- }
-#if 0 //doest only slow things down
- if(dmin>512*3){
- int step;
- dmin= score_map[0];
- best[0]= best[1]=0;
- for(step=128; step>0; step>>=1){
- const int step2= step;
- int y;
- for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
- int x;
- if(y<ymin || y>ymax) continue;
-
- for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
- if(x<xmin || x>xmax) continue;
- if(x==best[0] && y==best[1]) continue;
- CHECK_MV(x,y)
- }
- }
- }
- }
-#endif
-//check(best[0],best[1],0, b0)
- if(s->me_method==ME_EPZS)
- dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
- pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
- shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
- else
- dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride,
- pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
- shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
-//check(best[0],best[1],0, b1)
- *mx_ptr= best[0];
- *my_ptr= best[1];
-
-// printf("%d %d %d \n", best[0], best[1], dmin);
- return dmin;
-}
-
-static int epzs_motion_search4(MpegEncContext * s, int block,
- int *mx_ptr, int *my_ptr,
- int P[10][2], int pred_x, int pred_y,
- int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
-{
- int best[2]={0, 0};
- int d, dmin;
- UINT8 *new_pic, *old_pic;
- const int pic_stride= s->linesize;
- const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8;
- UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
- int quant= s->qscale; // qscale of the prev frame
- const int shift= 1+s->quarter_sample;
- uint32_t *map= s->me_map;
- uint16_t *score_map= s->me_score_map;
- int map_generation;
-
- new_pic = s->new_picture.data[0] + pic_xy;
- old_pic = ref_picture + pic_xy;
-
- map_generation= update_map_generation(s);
-
- dmin = 1000000;
-//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
- /* first line */
- if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
- CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
- CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
- CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
- }else{
- CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
- //FIXME try some early stop
- if(dmin>64*2){
- CHECK_MV4(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
- CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
- CHECK_MV4(P_TOP[0]>>shift, P_TOP[1]>>shift)
- CHECK_MV4(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
- CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
- }
- }
- if(dmin>64*4){
- CHECK_MV4(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
- CHECK_MV4(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
- }
-
- if(s->me_method==ME_EPZS)
- dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
- pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
- shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
- else
- dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride,
- pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
- shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
-
- *mx_ptr= best[0];
- *my_ptr= best[1];
-
-// printf("%d %d %d \n", best[0], best[1], dmin);
- return dmin;
-}
-
-#define CHECK_HALF_MV(suffix, x, y) \
+#define CHECK_SAD_HALF_MV(suffix, x, y) \
{\
d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
- d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
+ d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
COPY3_IF_LT(dminh, d, dx, x, dy, y)\
}
-
-/* The idea would be to make half pel ME after Inter/Intra decision to
- save time. */
-static inline int halfpel_motion_search(MpegEncContext * s,
+static inline int sad_hpel_motion_search(MpegEncContext * s,
int *mx_ptr, int *my_ptr, int dmin,
int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, uint8_t *ref_picture,
- op_pixels_abs_func pix_abs_x2,
- op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
+ int pred_x, int pred_y, Picture *picture,
+ int n, int size, uint16_t * const mv_penalty)
{
- UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
- const int quant= s->qscale;
+ uint8_t *ref_picture= picture->data[0];
+ uint32_t *score_map= s->me.score_map;
+ const int penalty_factor= s->me.sub_penalty_factor;
int mx, my, xx, yy, dminh;
UINT8 *pix, *ptr;
-
- if(s->skip_me){
- *mx_ptr = 0;
- *my_ptr = 0;
- return dmin;
- }
-
- xx = 16 * s->mb_x + 8*(n&1);
- yy = 16 * s->mb_y + 8*(n>>1);
- pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
-
- mx = *mx_ptr;
- my = *my_ptr;
- ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
+ op_pixels_abs_func pix_abs_x2;
+ op_pixels_abs_func pix_abs_y2;
+ op_pixels_abs_func pix_abs_xy2;
- dminh = dmin;
-
- if (mx > xmin && mx < xmax &&
- my > ymin && my < ymax) {
- int dx=0, dy=0;
- int d, pen_x, pen_y;
-
- mx<<=1;
- my<<=1;
-
- pen_x= pred_x + mx;
- pen_y= pred_y + my;
-
- ptr-= s->linesize;
- CHECK_HALF_MV(xy2, -1, -1)
- CHECK_HALF_MV(y2 , 0, -1)
- CHECK_HALF_MV(xy2, +1, -1)
-
- ptr+= s->linesize;
- CHECK_HALF_MV(x2 , -1, 0)
- CHECK_HALF_MV(x2 , +1, 0)
- CHECK_HALF_MV(xy2, -1, +1)
- CHECK_HALF_MV(y2 , 0, +1)
- CHECK_HALF_MV(xy2, +1, +1)
-
- mx+=dx;
- my+=dy;
+ if(size==0){
+ pix_abs_x2 = s->dsp.pix_abs16x16_x2;
+ pix_abs_y2 = s->dsp.pix_abs16x16_y2;
+ pix_abs_xy2= s->dsp.pix_abs16x16_xy2;
}else{
- mx<<=1;
- my<<=1;
+ pix_abs_x2 = s->dsp.pix_abs8x8_x2;
+ pix_abs_y2 = s->dsp.pix_abs8x8_y2;
+ pix_abs_xy2= s->dsp.pix_abs8x8_xy2;
}
- *mx_ptr = mx;
- *my_ptr = my;
- return dminh;
-}
-
-static inline int fast_halfpel_motion_search(MpegEncContext * s,
- int *mx_ptr, int *my_ptr, int dmin,
- int xmin, int ymin, int xmax, int ymax,
- int pred_x, int pred_y, uint8_t *ref_picture,
- op_pixels_abs_func pix_abs_x2,
- op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
-{
- UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
- uint16_t *score_map= s->me_score_map;
- const int quant= s->qscale;
- int mx, my, xx, yy, dminh;
- UINT8 *pix, *ptr;
-
- if(s->skip_me){
+ if(s->me.skip){
// printf("S");
*mx_ptr = 0;
*my_ptr = 0;
@@ -815,51 +652,51 @@ static inline int fast_halfpel_motion_search(MpegEncContext * s,
ptr-= s->linesize;
if(t<=b){
- CHECK_HALF_MV(y2 , 0, -1)
+ CHECK_SAD_HALF_MV(y2 , 0, -1)
if(l<=r){
- CHECK_HALF_MV(xy2, -1, -1)
+ CHECK_SAD_HALF_MV(xy2, -1, -1)
if(t+r<=b+l){
- CHECK_HALF_MV(xy2, +1, -1)
+ CHECK_SAD_HALF_MV(xy2, +1, -1)
ptr+= s->linesize;
}else{
ptr+= s->linesize;
- CHECK_HALF_MV(xy2, -1, +1)
+ CHECK_SAD_HALF_MV(xy2, -1, +1)
}
- CHECK_HALF_MV(x2 , -1, 0)
+ CHECK_SAD_HALF_MV(x2 , -1, 0)
}else{
- CHECK_HALF_MV(xy2, +1, -1)
+ CHECK_SAD_HALF_MV(xy2, +1, -1)
if(t+l<=b+r){
- CHECK_HALF_MV(xy2, -1, -1)
+ CHECK_SAD_HALF_MV(xy2, -1, -1)
ptr+= s->linesize;
}else{
ptr+= s->linesize;
- CHECK_HALF_MV(xy2, +1, +1)
+ CHECK_SAD_HALF_MV(xy2, +1, +1)
}
- CHECK_HALF_MV(x2 , +1, 0)
+ CHECK_SAD_HALF_MV(x2 , +1, 0)
}
}else{
if(l<=r){
if(t+l<=b+r){
- CHECK_HALF_MV(xy2, -1, -1)
+ CHECK_SAD_HALF_MV(xy2, -1, -1)
ptr+= s->linesize;
}else{
ptr+= s->linesize;
- CHECK_HALF_MV(xy2, +1, +1)
+ CHECK_SAD_HALF_MV(xy2, +1, +1)
}
- CHECK_HALF_MV(x2 , -1, 0)
- CHECK_HALF_MV(xy2, -1, +1)
+ CHECK_SAD_HALF_MV(x2 , -1, 0)
+ CHECK_SAD_HALF_MV(xy2, -1, +1)
}else{
if(t+r<=b+l){
- CHECK_HALF_MV(xy2, +1, -1)
+ CHECK_SAD_HALF_MV(xy2, +1, -1)
ptr+= s->linesize;
}else{
ptr+= s->linesize;
- CHECK_HALF_MV(xy2, -1, +1)
+ CHECK_SAD_HALF_MV(xy2, -1, +1)
}
- CHECK_HALF_MV(x2 , +1, 0)
- CHECK_HALF_MV(xy2, +1, +1)
+ CHECK_SAD_HALF_MV(x2 , +1, 0)
+ CHECK_SAD_HALF_MV(xy2, +1, +1)
}
- CHECK_HALF_MV(y2 , 0, +1)
+ CHECK_SAD_HALF_MV(y2 , 0, +1)
}
mx+=dx;
my+=dy;
@@ -933,6 +770,7 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in
int P[10][2];
uint8_t *ref_picture= s->last_picture.data[0];
int dmin_sum=0;
+ uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
for(block=0; block<4; block++){
int mx4, my4;
@@ -995,11 +833,11 @@ static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, in
P_MV1[0]= mx;
P_MV1[1]= my;
- dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
+ dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
+ &s->last_picture, mv_penalty);
- dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
- pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2,
- s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block);
+ dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
+ pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty);
s->motion_val[ s->block_index[block] ][0]= mx4;
s->motion_val[ s->block_index[block] ][1]= my4;
@@ -1021,13 +859,19 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
int mb_type=0;
uint8_t *ref_picture= s->last_picture.data[0];
Picture * const pic= &s->current_picture;
+ uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
+
+ assert(s->quarter_sample==0 || s->quarter_sample==1);
+
+ s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
+ s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
rel_xmin= xmin - mb_x*16;
rel_xmax= xmax - mb_x*16;
rel_ymin= ymin - mb_y*16;
rel_ymax= ymax - mb_y*16;
- s->skip_me=0;
+ s->me.skip=0;
switch(s->me_method) {
case ME_ZERO:
@@ -1096,7 +940,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
}
}
}
- dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
+ dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ &s->last_picture, mv_penalty);
break;
}
@@ -1112,8 +957,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
sum = s->dsp.pix_sum(pix, s->linesize);
varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
- // FIXME: MMX OPTIMIZE
- vard = (s->dsp.pix_norm(pix, ppix, s->linesize)+128)>>8;
+ vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8;
//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
pic->mb_var [s->mb_width * mb_y + mb_x] = varc;
@@ -1137,20 +981,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
mb_type|= MB_TYPE_INTRA;
if (varc*2 + 200 > vard){
mb_type|= MB_TYPE_INTER;
- if(s->me_method >= ME_EPZS)
- fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
- s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
- else
- halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
- s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
+ s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
}else{
- mx <<=1;
- my <<=1;
+ mx <<=shift;
+ my <<=shift;
}
if((s->flags&CODEC_FLAG_4MV)
- && !s->skip_me && varc>50 && vard>10){
+ && !s->me.skip && varc>50 && vard>10){
mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
mb_type|=MB_TYPE_INTER4V;
@@ -1159,19 +997,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
set_p_mv_tables(s, mx, my, 1);
}else{
if (vard <= 64 || vard < varc) {
+// if (sadP <= 32 || sadP < sadI + 500) {
s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
mb_type|= MB_TYPE_INTER;
if (s->me_method != ME_ZERO) {
- if(s->me_method >= ME_EPZS)
- dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
- s->dsp.pix_abs16x16_xy2, 0);
- else
- dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
- s->dsp.pix_abs16x16_xy2, 0);
+ dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
if((s->flags&CODEC_FLAG_4MV)
- && !s->skip_me && varc>50 && vard>10){
+ && !s->me.skip && varc>50 && vard>10){
int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
if(dmin4 + 128 <dmin)
mb_type= MB_TYPE_INTER4V;
@@ -1179,8 +1012,8 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
} else {
- mx <<=1;
- my <<=1;
+ mx <<=shift;
+ my <<=shift;
}
#if 0
if (vard < 10) {
@@ -1201,7 +1034,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
}
int ff_estimate_motion_b(MpegEncContext * s,
- int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *ref_picture, int f_code)
+ int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code)
{
int mx, my, range, dmin;
int xmin, ymin, xmax, ymax;
@@ -1211,7 +1044,12 @@ int ff_estimate_motion_b(MpegEncContext * s,
const int shift= 1+s->quarter_sample;
const int mot_stride = s->mb_width + 2;
const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
-
+ uint8_t * const ref_picture= picture->data[0];
+ uint16_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
+
+ s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
+ s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
+
get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
rel_xmin= xmin - mb_x*16;
rel_xmax= xmax - mb_x*16;
@@ -1275,22 +1113,22 @@ int ff_estimate_motion_b(MpegEncContext * s,
pred_x= P_LEFT[0];
pred_y= P_LEFT[1];
}
- dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
+ dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ picture, mv_penalty);
break;
}
- dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
- pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
- s->dsp.pix_abs16x16_xy2, 0);
+ dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+ pred_x, pred_y, picture, 0, 0, mv_penalty);
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
// s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
mv_table[mot_xy][0]= mx;
mv_table[mot_xy][1]= my;
+
return dmin;
}
-
static inline int check_bidir_mv(MpegEncContext * s,
int mb_x, int mb_y,
int motion_fx, int motion_fy,
@@ -1299,45 +1137,57 @@ static inline int check_bidir_mv(MpegEncContext * s,
int pred_bx, int pred_by)
{
//FIXME optimize?
- //FIXME direct mode penalty
- UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
- uint8_t *dest_y = s->me_scratchpad;
+ //FIXME move into template?
+ //FIXME better f_code prediction (max mv & distance)
+ UINT16 *mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+ uint8_t *dest_y = s->me.scratchpad;
uint8_t *ptr;
int dxy;
int src_x, src_y;
int fbmin;
- fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->qscale;
-
- dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
- src_x = mb_x * 16 + (motion_fx >> 1);
- src_y = mb_y * 16 + (motion_fy >> 1);
- src_x = clip(src_x, -16, s->width);
- if (src_x == s->width)
- dxy&= 2;
- src_y = clip(src_y, -16, s->height);
- if (src_y == s->height)
- dxy&= 1;
-
- ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
- s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
-
- fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale;
-
- dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
- src_x = mb_x * 16 + (motion_bx >> 1);
- src_y = mb_y * 16 + (motion_by >> 1);
- src_x = clip(src_x, -16, s->width);
- if (src_x == s->width)
- dxy&= 2;
- src_y = clip(src_y, -16, s->height);
- if (src_y == s->height)
- dxy&= 1;
-
- ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
- s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
-
- fbmin += s->dsp.pix_abs16x16(s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
+ if(s->quarter_sample){
+ dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
+ src_x = mb_x * 16 + (motion_fx >> 2);
+ src_y = mb_y * 16 + (motion_fy >> 2);
+ assert(src_x >=-16 && src_x<=s->width);
+ assert(src_y >=-16 && src_y<=s->height);
+
+ ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
+ s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
+
+ dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
+ src_x = mb_x * 16 + (motion_bx >> 2);
+ src_y = mb_y * 16 + (motion_by >> 2);
+ assert(src_x >=-16 && src_x<=s->width);
+ assert(src_y >=-16 && src_y<=s->height);
+
+ ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
+ s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
+ }else{
+ dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
+ src_x = mb_x * 16 + (motion_fx >> 1);
+ src_y = mb_y * 16 + (motion_fy >> 1);
+ assert(src_x >=-16 && src_x<=s->width);
+ assert(src_y >=-16 && src_y<=s->height);
+
+ ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
+ s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
+
+ dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
+ src_x = mb_x * 16 + (motion_bx >> 1);
+ src_y = mb_y * 16 + (motion_by >> 1);
+ assert(src_x >=-16 && src_x<=s->width);
+ assert(src_y >=-16 && src_y<=s->height);
+
+ ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
+ s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
+ }
+
+ fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor
+ +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor;
+ + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
+
return fbmin;
}
@@ -1374,66 +1224,14 @@ static inline int direct_search(MpegEncContext * s,
int P[10][2];
const int mot_stride = s->mb_width + 2;
const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
- int dmin, dmin2;
- int motion_fx, motion_fy, motion_bx, motion_by, motion_bx0, motion_by0;
- int motion_dx, motion_dy;
- const int motion_px= s->p_mv_table[mot_xy][0];
- const int motion_py= s->p_mv_table[mot_xy][1];
+ const int shift= 1+s->quarter_sample;
+ int dmin, i;
const int time_pp= s->pp_time;
const int time_pb= s->pb_time;
- const int time_bp= time_pp - time_pb;
- int bx, by;
- int mx, my, mx2, my2;
- uint8_t *ref_picture= s->me_scratchpad - (mb_x - 1 + (mb_y - 1)*s->linesize)*16;
+ int mx, my, xmin, xmax, ymin, ymax;
int16_t (*mv_table)[2]= s->b_direct_mv_table;
-/* uint16_t *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; */ // f_code of the prev frame
-
- /* thanks to iso-mpeg the rounding is different for the zero vector, so we need to handle that ... */
- motion_fx= (motion_px*time_pb)/time_pp;
- motion_fy= (motion_py*time_pb)/time_pp;
- motion_bx0= (-motion_px*time_bp)/time_pp;
- motion_by0= (-motion_py*time_bp)/time_pp;
- motion_dx= motion_dy=0;
- dmin2= check_bidir_mv(s, mb_x, mb_y,
- motion_fx, motion_fy,
- motion_bx0, motion_by0,
- motion_fx, motion_fy,
- motion_bx0, motion_by0) - s->qscale;
-
- motion_bx= motion_fx - motion_px;
- motion_by= motion_fy - motion_py;
- for(by=-1; by<2; by++){
- for(bx=-1; bx<2; bx++){
- uint8_t *dest_y = s->me_scratchpad + (by+1)*s->linesize*16 + (bx+1)*16;
- uint8_t *ptr;
- int dxy;
- int src_x, src_y;
- const int width= s->width;
- const int height= s->height;
-
- dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
- src_x = (mb_x + bx) * 16 + (motion_fx >> 1);
- src_y = (mb_y + by) * 16 + (motion_fy >> 1);
- src_x = clip(src_x, -16, width);
- if (src_x == width) dxy &= ~1;
- src_y = clip(src_y, -16, height);
- if (src_y == height) dxy &= ~2;
-
- ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
- s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
-
- dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
- src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
- src_y = (mb_y + by) * 16 + (motion_by >> 1);
- src_x = clip(src_x, -16, width);
- if (src_x == width) dxy &= ~1;
- src_y = clip(src_y, -16, height);
- if (src_y == height) dxy &= ~2;
-
- s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
- }
- }
-
+ uint16_t * const mv_penalty= s->me.mv_penalty[1] + MAX_MV;
+
P_LAST[0] = mv_table[mot_xy ][0];
P_LAST[1] = mv_table[mot_xy ][1];
P_LEFT[0] = mv_table[mot_xy - 1][0];
@@ -1458,62 +1256,81 @@ static inline int direct_search(MpegEncContext * s,
P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
}
- dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture);
- if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff
- if(dmin2<dmin){
- dmin= dmin2;
- mx=0;
- my=0;
+
+ ymin= xmin=(-32)>>shift;
+ ymax= xmax= 31>>shift;
+
+ if(s->co_located_type_table[mb_x + mb_y*s->mb_width]==CO_LOCATED_TYPE_4MV){
+ s->mv_type= MV_TYPE_8X8;
+ }else{
+ s->mv_type= MV_TYPE_16X16;
}
-#if 1
- mx2= mx= mx*2;
- my2= my= my*2;
- for(by=-1; by<2; by++){
- if(my2+by < -32) continue;
- for(bx=-1; bx<2; bx++){
- if(bx==0 && by==0) continue;
- if(mx2+bx < -32) continue;
- dmin2= check_bidir_mv(s, mb_x, mb_y,
- mx2+bx+motion_fx, my2+by+motion_fy,
- mx2+bx+motion_bx, my2+by+motion_by,
- mx2+bx+motion_fx, my2+by+motion_fy,
- motion_bx, motion_by) - s->qscale;
-
- if(dmin2<dmin){
- dmin=dmin2;
- mx= mx2 + bx;
- my= my2 + by;
- }
- }
+
+ for(i=0; i<4; i++){
+ int index= s->block_index[i];
+ int min, max;
+
+ s->me.co_located_mv[i][0]= s->motion_val[index][0];
+ s->me.co_located_mv[i][1]= s->motion_val[index][1];
+ s->me.direct_basis_mv[i][0]= s->me.co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
+ s->me.direct_basis_mv[i][1]= s->me.co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
+// s->me.direct_basis_mv[1][i][0]= s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
+// s->me.direct_basis_mv[1][i][1]= s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);
+
+ max= FFMAX(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
+ min= FFMIN(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
+ max+= (2*mb_x + (i& 1))*8 - 1; // +-1 is for the simpler rounding
+ min+= (2*mb_x + (i& 1))*8 + 1;
+ if(max >= s->width) xmax= s->width - max - 1;
+ if(min < -16 ) xmin= - 32 - min;
+
+ max= FFMAX(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
+ min= FFMIN(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
+ max+= (2*mb_y + (i>>1))*8 - 1; // +-1 is for the simpler rounding
+ min+= (2*mb_y + (i>>1))*8 + 1;
+ if(max >= s->height) ymax= s->height - max - 1;
+ if(min < -16 ) ymin= - 32 - min;
+
+ if(s->mv_type == MV_TYPE_16X16) break;
}
-#else
- mx*=2; my*=2;
-#endif
- if(mx==0 && my==0){
- motion_bx= motion_bx0;
- motion_by= motion_by0;
+
+ assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
+
+ if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
+ s->b_direct_mv_table[mot_xy][0]= 0;
+ s->b_direct_mv_table[mot_xy][1]= 0;
+
+ return 256*256*256*64;
+ }
+
+ if(s->flags&CODEC_FLAG_QPEL){
+ dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
+ &s->last_picture, mv_penalty);
+ dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
+ 0, 0, &s->last_picture, 0, 0, mv_penalty);
+ }else{
+ dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
+ &s->last_picture, mv_penalty);
+ dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
+ 0, 0, &s->last_picture, 0, 0, mv_penalty);
}
s->b_direct_mv_table[mot_xy][0]= mx;
s->b_direct_mv_table[mot_xy][1]= my;
- s->b_direct_forw_mv_table[mot_xy][0]= motion_fx + mx;
- s->b_direct_forw_mv_table[mot_xy][1]= motion_fy + my;
- s->b_direct_back_mv_table[mot_xy][0]= motion_bx + mx;
- s->b_direct_back_mv_table[mot_xy][1]= motion_by + my;
return dmin;
}
void ff_estimate_b_frame_motion(MpegEncContext * s,
int mb_x, int mb_y)
{
- const int quant= s->qscale;
+ const int penalty_factor= s->me.penalty_factor;
int fmin, bmin, dmin, fbmin;
int type=0;
dmin= direct_search(s, mb_x, mb_y);
- fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture.data[0], s->f_code);
- bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture.data[0], s->b_code) - quant;
+ fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code);
+ bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor;
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
fbmin= bidir_refine(s, mb_x, mb_y);
@@ -1541,22 +1358,10 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
if(s->flags&CODEC_FLAG_HQ){
type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
+ if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
}
-/*
-{
-static int count=0;
-static int sum=0;
-if(type==MB_TYPE_DIRECT){
- int diff= ABS(s->b_forw_mv_table)
-}
-}*/
-
s->mb_type[mb_y*s->mb_width + mb_x]= type;
-/* if(mb_y==0 && mb_x==0) printf("\n");
- if(mb_x==0) printf("\n");
- printf("%d", av_log2(type));
-*/
}
/* find best f_code for ME which do unlimited searches */
@@ -1569,7 +1374,7 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
int best_fcode=-1;
int best_score=-10000000;
- for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); //FIXME *2 and all other too so its the same but nicer
+ for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
for(y=0; y<s->mb_height; y++){
int x;
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
new file mode 100644
index 0000000000..45b23ce173
--- /dev/null
+++ b/libavcodec/motion_est_template.c
@@ -0,0 +1,737 @@
+/*
+ * Motion estimation
+ * Copyright (c) 2002 Michael Niedermayer
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
+//Note, the last line is there to kill these ugly unused var warnings
+#define LOAD_COMMON(x, y)\
+ uint32_t * const score_map= s->me.score_map;\
+ const int stride= s->linesize;\
+ const int uvstride= s->uvlinesize;\
+ const int time_pp= s->pp_time;\
+ const int time_pb= s->pb_time;\
+ uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\
+ uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
+ uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
+ uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\
+ uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\
+ uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\
+ uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\
+ op_pixels_func (*hpel_put)[4];\
+ op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\
+ op_pixels_func (*chroma_hpel_put)[4];\
+ qpel_mc_func (*qpel_put)[16];\
+ qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
+ const __attribute__((unused)) int unu= time_pp + time_pb + (int)src_u + (int)src_v + (int)ref_u + (int)ref_v\
+ + (int)ref2_y + (int)hpel_avg + (int)qpel_avg;\
+ if(s->no_rounding /*FIXME b_type*/){\
+ hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
+ chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
+ qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\
+ }else{\
+ hpel_put=& s->dsp.put_pixels_tab[size];\
+ chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\
+ qpel_put= &s->dsp.put_qpel_pixels_tab[size];\
+ }
+
+
+#ifdef CMP_HPEL
+
+#define CHECK_HALF_MV(dx, dy, x, y)\
+{\
+ const int hx= 2*(x)+(dx);\
+ const int hy= 2*(y)+(dy);\
+ CMP_HPEL(d, dx, dy, x, y, size);\
+ d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
+ COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
+}
+
+#if 0
+static int RENAME(hpel_motion_search)(MpegEncContext * s,
+ int *mx_ptr, int *my_ptr, int dmin,
+ int xmin, int ymin, int xmax, int ymax,
+ int pred_x, int pred_y, Picture *ref_picture,
+ int n, int size)
+{
+ UINT8 *ptr;
+
+ const int xx = 16 * s->mb_x + 8*(n&1);
+ const int yy = 16 * s->mb_y + 8*(n>>1);
+ const int mx = *mx_ptr;
+ const int my = *my_ptr;
+
+ LOAD_COMMON(xx, yy);
+
+ // INIT;
+ //FIXME factorize
+ me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
+
+ if(s->no_rounding /*FIXME b_type*/){
+ hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
+ chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
+ }else{
+ hpel_put=& s->dsp.put_pixels_tab[size];
+ chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
+ }
+ cmp= s->dsp.me_cmp[size];
+ chroma_cmp= s->dsp.me_cmp[size+1];
+ cmp_sub= s->dsp.me_sub_cmp[size];
+ chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
+
+ if(s->me.skip){ //FIXME somehow move up (benchmark)
+ *mx_ptr = 0;
+ *my_ptr = 0;
+ return dmin;
+ }
+
+ if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
+ CMP_HPEL(dmin, 0, 0, mx, my, size);
+ if(mx || my)
+ dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
+ }
+
+ if (mx > xmin && mx < xmax &&
+ my > ymin && my < ymax) {
+ int bx=2*mx, by=2*my;
+ int d= dmin;
+
+ CHECK_HALF_MV(1, 1, mx-1, my-1)
+ CHECK_HALF_MV(0, 1, mx , my-1)
+ CHECK_HALF_MV(1, 1, mx , my-1)
+ CHECK_HALF_MV(1, 0, mx-1, my )
+ CHECK_HALF_MV(1, 0, mx , my )
+ CHECK_HALF_MV(1, 1, mx-1, my )
+ CHECK_HALF_MV(0, 1, mx , my )
+ CHECK_HALF_MV(1, 1, mx , my )
+
+ assert(bx < xmin*2 || bx > xmax*2 || by < ymin*2 || by > ymax*2);
+
+ *mx_ptr = bx;
+ *my_ptr = by;
+ }else{
+ *mx_ptr =2*mx;
+ *my_ptr =2*my;
+ }
+
+ return dmin;
+}
+
+#else
+static int RENAME(hpel_motion_search)(MpegEncContext * s,
+ int *mx_ptr, int *my_ptr, int dmin,
+ int xmin, int ymin, int xmax, int ymax,
+ int pred_x, int pred_y, Picture *ref_picture,
+ int n, int size, uint16_t * const mv_penalty)
+{
+ const int xx = 16 * s->mb_x + 8*(n&1);
+ const int yy = 16 * s->mb_y + 8*(n>>1);
+ const int mx = *mx_ptr;
+ const int my = *my_ptr;
+ const int penalty_factor= s->me.sub_penalty_factor;
+ me_cmp_func cmp_sub, chroma_cmp_sub;
+
+ LOAD_COMMON(xx, yy);
+
+ //FIXME factorize
+
+ cmp_sub= s->dsp.me_sub_cmp[size];
+ chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
+
+ if(s->me.skip){ //FIXME move out of hpel?
+ *mx_ptr = 0;
+ *my_ptr = 0;
+ return dmin;
+ }
+
+ if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
+ CMP_HPEL(dmin, 0, 0, mx, my, size);
+ if(mx || my)
+ dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
+ }
+
+ if (mx > xmin && mx < xmax &&
+ my > ymin && my < ymax) {
+ int bx=2*mx, by=2*my;
+ int d= dmin;
+ const int index= (my<<ME_MAP_SHIFT) + mx;
+ const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+ + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*penalty_factor;
+ const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]
+ + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*penalty_factor;
+ const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]
+ + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*penalty_factor;
+ const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
+ + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*penalty_factor;
+
+ if(t<=b){
+ CHECK_HALF_MV(0, 1, mx ,my-1)
+ if(l<=r){
+ CHECK_HALF_MV(1, 1, mx-1, my-1)
+ if(t+r<=b+l){
+ CHECK_HALF_MV(1, 1, mx , my-1)
+ }else{
+ CHECK_HALF_MV(1, 1, mx-1, my )
+ }
+ CHECK_HALF_MV(1, 0, mx-1, my )
+ }else{
+ CHECK_HALF_MV(1, 1, mx , my-1)
+ if(t+l<=b+r){
+ CHECK_HALF_MV(1, 1, mx-1, my-1)
+ }else{
+ CHECK_HALF_MV(1, 1, mx , my )
+ }
+ CHECK_HALF_MV(1, 0, mx , my )
+ }
+ }else{
+ if(l<=r){
+ if(t+l<=b+r){
+ CHECK_HALF_MV(1, 1, mx-1, my-1)
+ }else{
+ CHECK_HALF_MV(1, 1, mx , my )
+ }
+ CHECK_HALF_MV(1, 0, mx-1, my)
+ CHECK_HALF_MV(1, 1, mx-1, my)
+ }else{
+ if(t+r<=b+l){
+ CHECK_HALF_MV(1, 1, mx , my-1)
+ }else{
+ CHECK_HALF_MV(1, 1, mx-1, my)
+ }
+ CHECK_HALF_MV(1, 0, mx , my)
+ CHECK_HALF_MV(1, 1, mx , my)
+ }
+ CHECK_HALF_MV(0, 1, mx , my)
+ }
+ assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
+
+ *mx_ptr = bx;
+ *my_ptr = by;
+ }else{
+ *mx_ptr =2*mx;
+ *my_ptr =2*my;
+ }
+
+ return dmin;
+}
+#endif
+
+#endif /* CMP_HPEL */
+
+#ifdef CMP_QPEL
+
+#define CHECK_QUARTER_MV(dx, dy, x, y)\
+{\
+ const int hx= 4*(x)+(dx);\
+ const int hy= 4*(y)+(dy);\
+ CMP_QPEL(d, dx, dy, x, y, size);\
+ d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
+ COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
+}
+
+static int RENAME(qpel_motion_search)(MpegEncContext * s,
+ int *mx_ptr, int *my_ptr, int dmin,
+ int xmin, int ymin, int xmax, int ymax,
+ int pred_x, int pred_y, Picture *ref_picture,
+ int n, int size, uint16_t * const mv_penalty)
+{
+ const int xx = 16 * s->mb_x + 8*(n&1);
+ const int yy = 16 * s->mb_y + 8*(n>>1);
+ const int mx = *mx_ptr;
+ const int my = *my_ptr;
+ const int penalty_factor= s->me.sub_penalty_factor;
+ const int map_generation= s->me.map_generation;
+ uint32_t *map= s->me.map;
+ me_cmp_func cmp, chroma_cmp;
+ me_cmp_func cmp_sub, chroma_cmp_sub;
+
+ LOAD_COMMON(xx, yy);
+
+ cmp= s->dsp.me_cmp[size];
+ chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME
+ //FIXME factorize
+
+ cmp_sub= s->dsp.me_sub_cmp[size];
+ chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
+
+ if(s->me.skip){ //FIXME somehow move up (benchmark)
+ *mx_ptr = 0;
+ *my_ptr = 0;
+ return dmin;
+ }
+
+ if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
+ CMP_QPEL(dmin, 0, 0, mx, my, size);
+ if(mx || my)
+ dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
+ }
+
+ if (mx > xmin && mx < xmax &&
+ my > ymin && my < ymax) {
+ int bx=4*mx, by=4*my;
+ int d= dmin;
+ int i, nx, ny;
+ const int index= (my<<ME_MAP_SHIFT) + mx;
+ const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
+ const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)];
+ const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)];
+ const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)];
+ const int c= score_map[(index )&(ME_MAP_SIZE-1)];
+ int best[8];
+ int best_pos[8][2];
+
+ memset(best, 64, sizeof(int)*8);
+#if 1
+ if(s->avctx->dia_size>=2){
+ const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
+ const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
+ const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
+ const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
+
+ for(ny= -3; ny <= 3; ny++){
+ for(nx= -3; nx <= 3; nx++){
+ const int t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
+ const int c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c;
+ const int b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
+ int score= ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2;
+ int i;
+
+ if((nx&3)==0 && (ny&3)==0) continue;
+
+ score += 1024*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
+
+// if(nx&1) score-=1024*s->me.penalty_factor;
+// if(ny&1) score-=1024*s->me.penalty_factor;
+
+ for(i=0; i<8; i++){
+ if(score < best[i]){
+ memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
+ memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
+ best[i]= score;
+ best_pos[i][0]= nx + 4*mx;
+ best_pos[i][1]= ny + 4*my;
+ break;
+ }
+ }
+ }
+ }
+ }else{
+ int tl;
+ const int cx = 4*(r - l);
+ const int cx2= r + l - 2*c;
+ const int cy = 4*(b - t);
+ const int cy2= b + t - 2*c;
+ int cxy;
+
+ if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
+ tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
+ }else{
+ CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different
+ }
+
+ cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
+
+ assert(16*cx2 + 4*cx + 32*c == 32*r);
+ assert(16*cx2 - 4*cx + 32*c == 32*l);
+ assert(16*cy2 + 4*cy + 32*c == 32*b);
+ assert(16*cy2 - 4*cy + 32*c == 32*t);
+ assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
+
+ for(ny= -3; ny <= 3; ny++){
+ for(nx= -3; nx <= 3; nx++){
+ int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
+ int i;
+
+ if((nx&3)==0 && (ny&3)==0) continue;
+
+ score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
+// if(nx&1) score-=32*s->me.penalty_factor;
+ // if(ny&1) score-=32*s->me.penalty_factor;
+
+ for(i=0; i<8; i++){
+ if(score < best[i]){
+ memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
+ memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
+ best[i]= score;
+ best_pos[i][0]= nx + 4*mx;
+ best_pos[i][1]= ny + 4*my;
+ break;
+ }
+ }
+ }
+ }
+ }
+ for(i=0; i<8; i++){
+ nx= best_pos[i][0];
+ ny= best_pos[i][1];
+ CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
+ }
+#if 0
+ nx= FFMAX(4*mx - bx, bx - 4*mx);
+ ny= FFMAX(4*my - by, by - 4*my);
+
+ static int stats[4][4];
+ stats[nx][ny]++;
+ if(256*256*256*64 % (stats[0][0]+1) ==0){
+ for(i=0; i<16; i++){
+ if((i&3)==0) printf("\n");
+ printf("%6d ", stats[0][i]);
+ }
+ printf("\n");
+ }
+#endif
+#else
+
+ CHECK_QUARTER_MV(2, 2, mx-1, my-1)
+ CHECK_QUARTER_MV(0, 2, mx , my-1)
+ CHECK_QUARTER_MV(2, 2, mx , my-1)
+ CHECK_QUARTER_MV(2, 0, mx , my )
+ CHECK_QUARTER_MV(2, 2, mx , my )
+ CHECK_QUARTER_MV(0, 2, mx , my )
+ CHECK_QUARTER_MV(2, 2, mx-1, my )
+ CHECK_QUARTER_MV(2, 0, mx-1, my )
+
+ nx= bx;
+ ny= by;
+
+ for(i=0; i<8; i++){
+ int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
+ int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
+ CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
+ }
+#endif
+#if 0
+ //outer ring
+ CHECK_QUARTER_MV(1, 3, mx-1, my-1)
+ CHECK_QUARTER_MV(1, 2, mx-1, my-1)
+ CHECK_QUARTER_MV(1, 1, mx-1, my-1)
+ CHECK_QUARTER_MV(2, 1, mx-1, my-1)
+ CHECK_QUARTER_MV(3, 1, mx-1, my-1)
+ CHECK_QUARTER_MV(0, 1, mx , my-1)
+ CHECK_QUARTER_MV(1, 1, mx , my-1)
+ CHECK_QUARTER_MV(2, 1, mx , my-1)
+ CHECK_QUARTER_MV(3, 1, mx , my-1)
+ CHECK_QUARTER_MV(3, 2, mx , my-1)
+ CHECK_QUARTER_MV(3, 3, mx , my-1)
+ CHECK_QUARTER_MV(3, 0, mx , my )
+ CHECK_QUARTER_MV(3, 1, mx , my )
+ CHECK_QUARTER_MV(3, 2, mx , my )
+ CHECK_QUARTER_MV(3, 3, mx , my )
+ CHECK_QUARTER_MV(2, 3, mx , my )
+ CHECK_QUARTER_MV(1, 3, mx , my )
+ CHECK_QUARTER_MV(0, 3, mx , my )
+ CHECK_QUARTER_MV(3, 3, mx-1, my )
+ CHECK_QUARTER_MV(2, 3, mx-1, my )
+ CHECK_QUARTER_MV(1, 3, mx-1, my )
+ CHECK_QUARTER_MV(1, 2, mx-1, my )
+ CHECK_QUARTER_MV(1, 1, mx-1, my )
+ CHECK_QUARTER_MV(1, 0, mx-1, my )
+#endif
+ assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
+
+ *mx_ptr = bx;
+ *my_ptr = by;
+ }else{
+ *mx_ptr =4*mx;
+ *my_ptr =4*my;
+ }
+
+ return dmin;
+}
+
+#endif /* CMP_QPEL */
+
+#define CHECK_MV(x,y)\
+{\
+ const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+ if(map[index]!=key){\
+ CMP(d, x, y, size);\
+ map[index]= key;\
+ score_map[index]= d;\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
+ COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
+ }\
+}
+
+#define CHECK_MV_DIR(x,y,new_dir)\
+{\
+ const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
+ const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
+ if(map[index]!=key){\
+ CMP(d, x, y, size);\
+ map[index]= key;\
+ score_map[index]= d;\
+ d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
+ if(d<dmin){\
+ best[0]=x;\
+ best[1]=y;\
+ dmin=d;\
+ next_dir= new_dir;\
+ }\
+ }\
+}
+
+#define check(x,y,S,v)\
+if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
+if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
+if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
+if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
+
+
+static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin,
+ Picture *ref_picture,
+ int const pred_x, int const pred_y, int const penalty_factor,
+ int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
+ uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty
+ )
+{
+ me_cmp_func cmp, chroma_cmp;
+ int next_dir=-1;
+ LOAD_COMMON(s->mb_x*16, s->mb_y*16);
+
+ cmp= s->dsp.me_cmp[size];
+ chroma_cmp= s->dsp.me_cmp[size+1];
+
+ for(;;){
+ int d;
+ const int dir= next_dir;
+ const int x= best[0];
+ const int y= best[1];
+ next_dir=-1;
+
+//printf("%d", dir);
+ if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
+ if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
+ if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
+ if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
+
+ if(next_dir==-1){
+ return dmin;
+ }
+ }
+}
+
+static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin,
+ Picture *ref_picture,
+ int const pred_x, int const pred_y, int const penalty_factor,
+ int const xmin, int const ymin, int const xmax, int const ymax, int const shift,
+ uint32_t *map, int map_generation, int size, uint16_t * const mv_penalty
+ )
+{
+ me_cmp_func cmp, chroma_cmp;
+ int dia_size=1;
+ LOAD_COMMON(s->mb_x*16, s->mb_y*16);
+
+ cmp= s->dsp.me_cmp[size];
+ chroma_cmp= s->dsp.me_cmp[size+1];
+
+ for(dia_size=1; dia_size<=s->avctx->dia_size; dia_size++){
+ int dir, start, end;
+ const int x= best[0];
+ const int y= best[1];
+
+ start= FFMAX(0, y + dia_size - ymax);
+ end = FFMIN(dia_size, xmax - x);
+ for(dir= start; dir<end; dir++){
+ int d;
+
+//check(x + dir,y + dia_size - dir,0, a0)
+ CHECK_MV(x + dir , y + dia_size - dir);
+ }
+
+ start= FFMAX(0, x + dia_size - xmax);
+ end = FFMIN(dia_size, y - ymin);
+ for(dir= start; dir<end; dir++){
+ int d;
+
+//check(x + dia_size - dir, y - dir,0, a1)
+ CHECK_MV(x + dia_size - dir, y - dir );
+ }
+
+ start= FFMAX(0, -y + dia_size + ymin );
+ end = FFMIN(dia_size, x - xmin);
+ for(dir= start; dir<end; dir++){
+ int d;
+
+//check(x - dir,y - dia_size + dir,0, a2)
+ CHECK_MV(x - dir , y - dia_size + dir);
+ }
+
+ start= FFMAX(0, -x + dia_size + xmin );
+ end = FFMIN(dia_size, ymax - y);
+ for(dir= start; dir<end; dir++){
+ int d;
+
+//check(x - dia_size + dir, y + dir,0, a3)
+ CHECK_MV(x - dia_size + dir, y + dir );
+ }
+
+ if(x!=best[0] || y!=best[1])
+ dia_size=0;
+ }
+ return dmin;
+}
+
+static int RENAME(epzs_motion_search)(MpegEncContext * s, int block,
+ int *mx_ptr, int *my_ptr,
+ int P[10][2], int pred_x, int pred_y,
+ int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty)
+{
+ int best[2]={0, 0};
+ int d, dmin;
+ const int shift= 1+s->quarter_sample;
+ uint32_t *map= s->me.map;
+ int map_generation;
+ const int penalty_factor= s->me.penalty_factor;
+ const int size=0;
+ me_cmp_func cmp, chroma_cmp;
+ LOAD_COMMON(s->mb_x*16, s->mb_y*16);
+
+ cmp= s->dsp.me_cmp[size];
+ chroma_cmp= s->dsp.me_cmp[size+1];
+
+ map_generation= update_map_generation(s);
+
+ CMP(dmin, 0, 0, size);
+ map[0]= map_generation;
+ score_map[0]= dmin;
+
+ /* first line */
+ if ((s->mb_y == 0 || s->first_slice_line)) {
+ CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+ CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
+ }else{
+ if(dmin<256 && ( P_LEFT[0] |P_LEFT[1]
+ |P_TOP[0] |P_TOP[1]
+ |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0 && s->avctx->dia_size==0){
+ *mx_ptr= 0;
+ *my_ptr= 0;
+ s->me.skip=1;
+ return dmin;
+ }
+ CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
+ if(dmin>256*2){
+ CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift)
+ CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
+ CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
+ CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
+ }
+ }
+ if(dmin>256*4){
+ CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
+ CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
+ }
+#if 0 //doest only slow things down
+ if(dmin>512*3){
+ int step;
+ dmin= score_map[0];
+ best[0]= best[1]=0;
+ for(step=128; step>0; step>>=1){
+ const int step2= step;
+ int y;
+ for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
+ int x;
+ if(y<ymin || y>ymax) continue;
+
+ for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
+ if(x<xmin || x>xmax) continue;
+ if(x==best[0] && y==best[1]) continue;
+ CHECK_MV(x,y)
+ }
+ }
+ }
+ }
+#endif
+//check(best[0],best[1],0, b0)
+ if(s->avctx->dia_size<2)
+ dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
+ pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
+ shift, map, map_generation, size, mv_penalty);
+ else
+ dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
+ pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
+ shift, map, map_generation, size, mv_penalty);
+
+//check(best[0],best[1],0, b1)
+ *mx_ptr= best[0];
+ *my_ptr= best[1];
+
+// printf("%d %d %d \n", best[0], best[1], dmin);
+ return dmin;
+}
+
+#ifndef CMP_DIRECT /* no 4mv search needed in direct mode */
+static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block,
+ int *mx_ptr, int *my_ptr,
+ int P[10][2], int pred_x, int pred_y,
+ int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, uint16_t * const mv_penalty)
+{
+ int best[2]={0, 0};
+ int d, dmin;
+ const int shift= 1+s->quarter_sample;
+ uint32_t *map= s->me.map;
+ int map_generation;
+ const int penalty_factor= s->me.penalty_factor;
+ const int size=1;
+ me_cmp_func cmp, chroma_cmp;
+ LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8);
+
+ cmp= s->dsp.me_cmp[size];
+ chroma_cmp= s->dsp.me_cmp[size+1];
+
+ map_generation= update_map_generation(s);
+
+ dmin = 1000000;
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
+ /* first line */
+ if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
+ CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+ CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
+ CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
+ }else{
+ CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
+ //FIXME try some early stop
+ if(dmin>64*2){
+ CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
+ CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+ CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
+ CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
+ CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
+ }
+ }
+ if(dmin>64*4){
+ CHECK_MV(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
+ CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
+ }
+
+ if(s->avctx->dia_size<2)
+ dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture,
+ pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
+ shift, map, map_generation, size, mv_penalty);
+ else
+ dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture,
+ pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax,
+ shift, map, map_generation, size, mv_penalty);
+ *mx_ptr= best[0];
+ *my_ptr= best[1];
+
+// printf("%d %d %d \n", best[0], best[1], dmin);
+ return dmin;
+}
+#endif /* !CMP_DIRECT */
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 17a9aec20a..5dc3e6218e 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -526,7 +526,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
}
}
}
- s->mv_penalty= mv_penalty;
+ s->me.mv_penalty= mv_penalty;
s->fcode_tab= fcode_tab;
s->min_qcoeff=-255;
s->max_qcoeff= 255;
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index a7808e107b..7c5cf59c87 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -43,8 +43,6 @@ static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
-static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
- int src_x, int src_y, int w, int h);
/* enable all paranoid tests for rounding, overflows, etc... */
@@ -64,8 +62,8 @@ static const uint16_t aanscales[64] = {
19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
- 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
- 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
+ 8867 , 12299, 11585, 10426, 8867, 6967, 4799, 2446,
+ 4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247
};
/* Input permutation for the simple_idct_mmx */
@@ -87,9 +85,6 @@ static const uint8_t h263_chroma_roundtab[16] = {
static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL;
static UINT8 default_fcode_tab[MAX_MV*2+1];
-/* default motion estimation */
-int motion_estimation_method = ME_EPZS;
-
static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
const UINT16 *quant_matrix, int bias, int qmin, int qmax)
{
@@ -394,15 +389,13 @@ int MPV_common_init(MpegEncContext *s)
CHECKED_ALLOCZ(s->b_back_mv_table , mv_table_size * 2 * sizeof(INT16))
CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16))
- CHECKED_ALLOCZ(s->b_direct_forw_mv_table, mv_table_size * 2 * sizeof(INT16))
- CHECKED_ALLOCZ(s->b_direct_back_mv_table, mv_table_size * 2 * sizeof(INT16))
CHECKED_ALLOCZ(s->b_direct_mv_table , mv_table_size * 2 * sizeof(INT16))
//FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
- CHECKED_ALLOCZ(s->me_scratchpad, s->width*2*16*3*sizeof(uint8_t))
+ CHECKED_ALLOCZ(s->me.scratchpad, s->width*2*16*3*sizeof(uint8_t))
- CHECKED_ALLOCZ(s->me_map , ME_MAP_SIZE*sizeof(uint32_t))
- CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t))
+ CHECKED_ALLOCZ(s->me.map , ME_MAP_SIZE*sizeof(uint32_t))
+ CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
if(s->codec_id==CODEC_ID_MPEG4){
CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
@@ -498,8 +491,6 @@ void MPV_common_end(MpegEncContext *s)
av_freep(&s->b_back_mv_table);
av_freep(&s->b_bidir_forw_mv_table);
av_freep(&s->b_bidir_back_mv_table);
- av_freep(&s->b_direct_forw_mv_table);
- av_freep(&s->b_direct_back_mv_table);
av_freep(&s->b_direct_mv_table);
av_freep(&s->motion_val);
av_freep(&s->dc_val[0]);
@@ -508,9 +499,9 @@ void MPV_common_end(MpegEncContext *s)
av_freep(&s->mbintra_table);
av_freep(&s->cbp_table);
av_freep(&s->pred_dir_table);
- av_freep(&s->me_scratchpad);
- av_freep(&s->me_map);
- av_freep(&s->me_score_map);
+ av_freep(&s->me.scratchpad);
+ av_freep(&s->me.map);
+ av_freep(&s->me.score_map);
av_freep(&s->mbskip_table);
av_freep(&s->bitstream_buffer);
@@ -566,6 +557,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->chroma_elim_threshold= avctx->chroma_elim_threshold;
s->strict_std_compliance= avctx->strict_std_compliance;
s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
+ s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
s->mpeg_quant= avctx->mpeg_quant;
if (s->gop_size <= 1) {
@@ -575,12 +567,7 @@ int MPV_encode_init(AVCodecContext *avctx)
s->intra_only = 0;
}
- /* ME algorithm */
- if (avctx->me_method == 0)
- /* For compatibility */
- s->me_method = motion_estimation_method;
- else
- s->me_method = avctx->me_method;
+ s->me_method = avctx->me_method;
/* Fixed QSCALE */
s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
@@ -713,13 +700,14 @@ int MPV_encode_init(AVCodecContext *avctx)
}
}
}
- s->mv_penalty= default_mv_penalty;
+ s->me.mv_penalty= default_mv_penalty;
s->fcode_tab= default_fcode_tab;
s->y_dc_scale_table=
s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
/* dont use mv_penalty table for crap MV as it would be confused */
- if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
+ //FIXME remove after fixing / removing old ME
+ if (s->me_method < ME_EPZS) s->me.mv_penalty = default_mv_penalty;
s->encoding = 1;
@@ -727,6 +715,8 @@ int MPV_encode_init(AVCodecContext *avctx)
if (MPV_common_init(s) < 0)
return -1;
+ ff_init_me(s);
+
#ifdef CONFIG_ENCODERS
if (s->out_format == FMT_H263)
h263_encode_init(s);
@@ -947,6 +937,18 @@ void MPV_frame_end(MpegEncContext *s)
if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/)
s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
}
+ if(s->avctx->debug&FF_DEBUG_SKIP){
+ int x,y;
+ for(y=0; y<s->mb_height; y++){
+ for(x=0; x<s->mb_width; x++){
+ int count= s->mbskip_table[x + y*s->mb_width];
+ if(count>9) count=9;
+ printf(" %1d", count);
+ }
+ printf("\n");
+ }
+ printf("pict type: %d\n", s->pict_type);
+ }
}
static int get_sae(uint8_t *src, int ref, int stride){
@@ -1284,7 +1286,7 @@ static inline void gmc1_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos
|| src_y + (motion_y&15) + 16 > s->v_edge_pos){
- emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+ ff_emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer;
emu=1;
}
@@ -1322,14 +1324,14 @@ static inline void gmc1_motion(MpegEncContext *s,
offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
ptr = ref_picture[1] + offset;
if(emu){
- emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+ ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
}
s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
ptr = ref_picture[2] + offset;
if(emu){
- emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+ ff_emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
}
s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
@@ -1401,7 +1403,7 @@ static inline void gmc_motion(MpegEncContext *s,
}
-static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
+void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
int src_x, int src_y, int w, int h){
int x, y;
int start_y, start_x, end_y, end_x;
@@ -1501,7 +1503,7 @@ if(s->quarter_sample)
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
|| src_y + (motion_y&1) + h > v_edge_pos){
- emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
+ ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer + src_offset;
emu=1;
@@ -1538,7 +1540,7 @@ if(s->quarter_sample)
offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
ptr = ref_picture[1] + offset;
if(emu){
- emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
+ ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer + (src_offset >> 1);
}
@@ -1546,7 +1548,7 @@ if(s->quarter_sample)
ptr = ref_picture[2] + offset;
if(emu){
- emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
+ ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer + (src_offset >> 1);
}
@@ -1586,7 +1588,7 @@ static inline void qpel_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
|| src_y + (motion_y&3) + h > v_edge_pos){
- emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
+ ff_emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer + src_offset;
emu=1;
@@ -1631,7 +1633,7 @@ static inline void qpel_motion(MpegEncContext *s,
offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
ptr = ref_picture[1] + offset;
if(emu){
- emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
+ ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer + (src_offset >> 1);
}
@@ -1639,7 +1641,7 @@ static inline void qpel_motion(MpegEncContext *s,
ptr = ref_picture[2] + offset;
if(emu){
- emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
+ ff_emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer + (src_offset >> 1);
}
@@ -1675,6 +1677,10 @@ static inline void MPV_motion(MpegEncContext *s,
ref_picture, 0,
0, pix_op, qpix_op,
s->mv[dir][0][0], s->mv[dir][0][1], 16);
+ }else if(s->mspel){
+ ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
+ ref_picture, pix_op,
+ s->mv[dir][0][0], s->mv[dir][0][1], 16);
}else{
mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
ref_picture, 0,
@@ -1706,7 +1712,7 @@ static inline void MPV_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
|| src_y + (motion_y&3) + 8 > s->v_edge_pos){
- emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+ ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer;
}
}
@@ -1737,7 +1743,7 @@ static inline void MPV_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
|| src_y + (motion_y&1) + 8 > s->v_edge_pos){
- emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
+ ff_emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
ptr= s->edge_emu_buffer;
}
}
@@ -1784,7 +1790,7 @@ static inline void MPV_motion(MpegEncContext *s,
if(s->flags&CODEC_FLAG_EMU_EDGE){
if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
|| src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
- emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+ ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
emu=1;
}
@@ -1793,7 +1799,7 @@ static inline void MPV_motion(MpegEncContext *s,
ptr = ref_picture[2] + offset;
if(emu){
- emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+ ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
ptr= s->edge_emu_buffer;
}
pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
@@ -1928,7 +1934,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
/* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
-
+ //FIXME a lot of thet is only needed for !low_delay
const int wrap = s->block_wrap[0];
const int xy = s->block_index[0];
const int mb_index= s->mb_x + s->mb_y*s->mb_width;
@@ -2064,7 +2070,7 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
}
- } else {
+ } else if(s->codec_id != CODEC_ID_WMV2){
add_dct(s, block[0], 0, dest_y, dct_linesize);
add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
@@ -2074,6 +2080,8 @@ void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
}
+ } else{
+ ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
}
} else {
/* dct only in intra block */
@@ -2376,7 +2384,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
- emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
+ ff_emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
ptr= s->edge_emu_buffer;
emu=1;
}
@@ -2408,14 +2416,14 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
int wrap_c = s->uvlinesize;
ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
if(emu){
- emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+ ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ptr= s->edge_emu_buffer;
}
s->dsp.get_pixels(s->block[4], ptr, wrap_c);
ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
if(emu){
- emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+ ff_emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ptr= s->edge_emu_buffer;
}
s->dsp.get_pixels(s->block[5], ptr, wrap_c);
@@ -2455,7 +2463,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
}
if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
- emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
+ ff_emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
ptr_y= s->edge_emu_buffer;
emu=1;
}
@@ -2487,12 +2495,12 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
skip_dct[5]= 1;
}else{
if(emu){
- emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+ ff_emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ptr_cb= s->edge_emu_buffer;
}
s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
if(emu){
- emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
+ ff_emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
ptr_cr= s->edge_emu_buffer;
}
s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
@@ -2574,21 +2582,25 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
#ifdef CONFIG_ENCODERS
/* huffman encode */
- switch(s->out_format) {
- case FMT_MPEG1:
- mpeg1_encode_mb(s, s->block, motion_x, motion_y);
- break;
- case FMT_H263:
- if (s->h263_msmpeg4)
- msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
- else if(s->h263_pred)
- mpeg4_encode_mb(s, s->block, motion_x, motion_y);
- else
- h263_encode_mb(s, s->block, motion_x, motion_y);
- break;
- case FMT_MJPEG:
- mjpeg_encode_mb(s, s->block);
- break;
+ switch(s->codec_id){ //FIXME funct ptr could be slightly faster
+ case CODEC_ID_MPEG1VIDEO:
+ mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
+ case CODEC_ID_MPEG4:
+ mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
+ case CODEC_ID_MSMPEG4V2:
+ case CODEC_ID_MSMPEG4V3:
+ case CODEC_ID_WMV1:
+ msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
+ case CODEC_ID_WMV2:
+ ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
+ case CODEC_ID_MJPEG:
+ mjpeg_encode_mb(s, s->block); break;
+ case CODEC_ID_H263:
+ case CODEC_ID_H263P:
+ case CODEC_ID_RV10:
+ h263_encode_mb(s, s->block, motion_x, motion_y); break;
+ default:
+ assert(0);
}
#endif
}
@@ -2704,13 +2716,18 @@ static inline int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, in
int x,y;
if(w==16 && h==16)
- return s->dsp.pix_norm(src1, src2, stride);
+ return s->dsp.sse[0](NULL, src1, src2, stride);
+ else if(w==8 && h==8)
+ return s->dsp.sse[1](NULL, src1, src2, stride);
for(y=0; y<h; y++){
for(x=0; x<w; x++){
acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
}
}
+
+ assert(acc>=0);
+
return acc;
}
@@ -2751,6 +2768,18 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->scene_change_score=0;
s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
+
+ if(s->msmpeg4_version){
+ if(s->pict_type==I_TYPE)
+ s->no_rounding=1;
+ else if(s->flipflop_rounding)
+ s->no_rounding ^= 1;
+ }else{
+ if(s->pict_type==I_TYPE)
+ s->no_rounding=0;
+ else if(s->pict_type!=B_TYPE)
+ s->no_rounding ^= 1;
+ }
/* Estimate motion for every MB */
if(s->pict_type != I_TYPE){
@@ -2772,7 +2801,6 @@ static void encode_picture(MpegEncContext *s, int picture_number)
ff_estimate_b_frame_motion(s, mb_x, mb_y);
else
ff_estimate_p_frame_motion(s, mb_x, mb_y);
-// s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
}
}
}else /* if(s->pict_type == I_TYPE) */{
@@ -2867,7 +2895,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
mjpeg_picture_header(s);
break;
case FMT_H263:
- if (s->h263_msmpeg4)
+ if (s->codec_id == CODEC_ID_WMV2)
+ ff_wmv2_encode_picture_header(s, picture_number);
+ else if (s->h263_msmpeg4)
msmpeg4_encode_picture_header(s, picture_number);
else if (s->h263_pred)
mpeg4_encode_picture_header(s, picture_number);
@@ -3049,15 +3079,14 @@ static void encode_picture(MpegEncContext *s, int picture_number)
&dmin, &next_block, 0, 0);
}
if(mb_type&MB_TYPE_DIRECT){
+ int mx= s->b_direct_mv_table[xy][0];
+ int my= s->b_direct_mv_table[xy][1];
+
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
- s->mv_type = MV_TYPE_16X16; //FIXME
s->mb_intra= 0;
- s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
- s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
- s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
- s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
+ ff_mpeg4_set_direct_mv(s, mx, my);
encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
- &dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]);
+ &dmin, &next_block, mx, my);
}
if(mb_type&MB_TYPE_INTRA){
s->mv_dir = MV_DIR_FORWARD;
@@ -3122,10 +3151,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
s->mb_intra= 0;
motion_x=s->b_direct_mv_table[xy][0];
motion_y=s->b_direct_mv_table[xy][1];
- s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
- s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
- s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
- s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
+ ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
break;
case MB_TYPE_BIDIR:
s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
@@ -3170,7 +3196,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
-
+
s->current_picture.error[0] += sse(
s,
s->new_picture .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
@@ -3471,6 +3497,7 @@ char ff_get_pict_type_char(int pict_type){
case P_TYPE: return 'P';
case B_TYPE: return 'B';
case S_TYPE: return 'S';
+ default: return '?';
}
}
@@ -3574,12 +3601,3 @@ AVCodec wmv1_encoder = {
MPV_encode_end,
};
-AVCodec wmv2_encoder = {
- "wmv2",
- CODEC_TYPE_VIDEO,
- CODEC_ID_WMV2,
- sizeof(MpegEncContext),
- MPV_encode_init,
- MPV_encode_picture,
- MPV_encode_end,
-};
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index ca0054418f..84e28f9d76 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -129,6 +129,31 @@ typedef struct ParseContext{
int frame_start_found;
} ParseContext;
+struct MpegEncContext;
+
+typedef struct MotionEstContext{
+ int skip; /* set if ME is skiped for the current MB */
+ int co_located_mv[4][2]; /* mv from last p frame for direct mode ME */
+ int direct_basis_mv[4][2];
+ uint8_t *scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */
+ uint32_t *map; /* map to avoid duplicate evaluations */
+ uint32_t *score_map; /* map to store the scores */
+ int map_generation;
+ int penalty_factor;
+ int sub_penalty_factor;
+ UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV */
+ int (*sub_motion_search)(struct MpegEncContext * s,
+ int *mx_ptr, int *my_ptr, int dmin,
+ int xmin, int ymin, int xmax, int ymax,
+ int pred_x, int pred_y, Picture *ref_picture,
+ int n, int size, uint16_t * const mv_penalty);
+ int (*motion_search[7])(struct MpegEncContext * s, int block,
+ int *mx_ptr, int *my_ptr,
+ int P[10][2], int pred_x, int pred_y,
+ int xmin, int ymin, int xmax, int ymax, Picture *ref_picture,
+ uint16_t * const mv_penalty);
+}MotionEstContext;
+
typedef struct MpegEncContext {
struct AVCodecContext *avctx;
/* the following parameters must be initialized before encoding */
@@ -222,15 +247,8 @@ typedef struct MpegEncContext {
INT16 (*b_back_mv_table)[2]; /* MV table (1MV per MB) backward mode b-frame encoding */
INT16 (*b_bidir_forw_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
INT16 (*b_bidir_back_mv_table)[2]; /* MV table (1MV per MB) bidir mode b-frame encoding */
- INT16 (*b_direct_forw_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
- INT16 (*b_direct_back_mv_table)[2];/* MV table (1MV per MB) direct mode b-frame encoding */
INT16 (*b_direct_mv_table)[2]; /* MV table (1MV per MB) direct mode b-frame encoding */
int me_method; /* ME algorithm */
- uint8_t *me_scratchpad; /* data area for the me algo, so that the ME doesnt need to malloc/free */
- uint32_t *me_map; /* map to avoid duplicate evaluations */
- uint16_t *me_score_map; /* map to store the SADs */
- int me_map_generation;
- int skip_me; /* set if ME is skiped for the current MB */
int scene_change_score;
int mv_dir;
#define MV_DIR_BACKWARD 1
@@ -250,8 +268,9 @@ typedef struct MpegEncContext {
int mv[2][4][2];
int field_select[2][2];
int last_mv[2][2][2]; /* last MV, used for MV prediction in MPEG1 & B-frame MPEG4 */
- UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV, used for ME */
UINT8 *fcode_tab; /* smallest fcode needed for each MV */
+
+ MotionEstContext me;
int no_rounding; /* apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
for b-frames rounding mode is allways 0 */
@@ -458,6 +477,7 @@ typedef struct MpegEncContext {
/* [mb_intra][isChroma][level][run][last] */
int (*ac_stats)[2][MAX_LEVEL+1][MAX_RUN+1][2];
int inter_intra_pred;
+ int mspel;
/* decompression specific */
GetBitContext gb;
@@ -519,6 +539,7 @@ typedef struct MpegEncContext {
void (*fdct)(DCTELEM *block/* align 16*/);
void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
void (*idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+ //FIXME move above funcs into dspContext perhaps
} MpegEncContext;
@@ -528,6 +549,9 @@ void MPV_common_end(MpegEncContext *s);
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx);
void MPV_frame_end(MpegEncContext *s);
+int MPV_encode_init(AVCodecContext *avctx);
+int MPV_encode_end(AVCodecContext *avctx);
+int MPV_encode_picture(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data);
#ifdef HAVE_MMX
void MPV_common_init_mmx(MpegEncContext *s);
#endif
@@ -553,6 +577,8 @@ void ff_clean_intra_table_entries(MpegEncContext *s);
void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable);
void ff_error_resilience(MpegEncContext *s);
void ff_draw_horiz_band(MpegEncContext *s);
+void ff_emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
+ int src_x, int src_y, int w, int h);
char ff_get_pict_type_char(int pict_type);
@@ -585,6 +611,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type);
void ff_fix_long_p_mvs(MpegEncContext * s);
void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type);
+void ff_init_me(MpegEncContext *s);
/* mpeg12.c */
@@ -631,6 +658,11 @@ extern UINT8 ff_mpeg4_y_dc_scale_table[32];
extern UINT8 ff_mpeg4_c_dc_scale_table[32];
extern const INT16 ff_mpeg4_default_intra_matrix[64];
extern const INT16 ff_mpeg4_default_non_intra_matrix[64];
+int ff_h263_decode_init(AVCodecContext *avctx);
+int ff_h263_decode_frame(AVCodecContext *avctx,
+ void *data, int *data_size,
+ UINT8 *buf, int buf_size);
+int ff_h263_decode_end(AVCodecContext *avctx);
void h263_encode_mb(MpegEncContext *s,
DCTELEM block[6][64],
int motion_x, int motion_y);
@@ -667,6 +699,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s);
int ff_mpeg4_get_video_packet_prefix_length(MpegEncContext *s);
int ff_h263_resync(MpegEncContext *s);
int ff_h263_get_gob_height(MpegEncContext *s);
+void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my);
/* rv10.c */
@@ -684,7 +717,16 @@ int msmpeg4_decode_picture_header(MpegEncContext * s);
int msmpeg4_decode_ext_header(MpegEncContext * s, int buf_size);
int ff_msmpeg4_decode_init(MpegEncContext *s);
void ff_msmpeg4_encode_init(MpegEncContext *s);
-
+int ff_wmv2_decode_picture_header(MpegEncContext * s);
+void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr);
+void ff_mspel_motion(MpegEncContext *s,
+ UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
+ UINT8 **ref_picture, op_pixels_func (*pix_op)[4],
+ int motion_x, int motion_y, int h);
+int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number);
+void ff_wmv2_encode_mb(MpegEncContext * s,
+ DCTELEM block[6][64],
+ int motion_x, int motion_y);
/* mjpegenc.c */
int mjpeg_init(MpegEncContext *s);
diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c
index 76fa3c2d49..a08418874c 100644
--- a/libavcodec/msmpeg4.c
+++ b/libavcodec/msmpeg4.c
@@ -48,12 +48,14 @@
#define II_BITRATE 128*1024
#define MBAC_BITRATE 50*1024
+#define DEFAULT_INTER_INDEX 3
+
static UINT32 v2_dc_lum_table[512][2];
static UINT32 v2_dc_chroma_table[512][2];
static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n);
static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
- int n, int coded);
+ int n, int coded, const uint8_t *scantable);
static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
static int msmpeg4_decode_motion(MpegEncContext * s,
int *mx_ptr, int *my_ptr);
@@ -63,6 +65,7 @@ static inline void msmpeg4_memsetw(short *tab, int val, int n);
static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run, int level, int intra);
static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
+static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
extern UINT32 inverse[256];
@@ -160,13 +163,14 @@ static void common_init(MpegEncContext * s)
}
break;
case 4:
+ case 5:
s->y_dc_scale_table= wmv1_y_dc_scale_table;
s->c_dc_scale_table= wmv1_c_dc_scale_table;
break;
}
- if(s->msmpeg4_version==4){
+ if(s->msmpeg4_version>=4){
ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]);
ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]);
ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]);
@@ -370,9 +374,9 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
s->per_mb_rl_table = 0;
if(s->msmpeg4_version==4)
s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE && s->pict_type==P_TYPE);
+//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
if (s->pict_type == I_TYPE) {
- s->no_rounding = 1;
s->slice_height= s->mb_height/1;
put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height);
@@ -404,12 +408,6 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
put_bits(&s->pb, 1, s->mv_table_index);
}
-
- if(s->flipflop_rounding){
- s->no_rounding ^= 1;
- }else{
- s->no_rounding = 0;
- }
}
s->esc3_level_length= 0;
@@ -923,7 +921,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
}
/* recalculate block_last_index for M$ wmv1 */
- if(s->msmpeg4_version==4 && s->block_last_index[n]>0){
+ if(s->msmpeg4_version>=4 && s->block_last_index[n]>0){
for(last_index=63; last_index>=0; last_index--){
if(block[scantable[last_index]]) break;
}
@@ -975,7 +973,7 @@ else
/* third escape */
put_bits(&s->pb, 1, 0);
put_bits(&s->pb, 1, last);
- if(s->msmpeg4_version==4){
+ if(s->msmpeg4_version>=4){
if(s->esc3_level_length==0){
s->esc3_level_length=8;
s->esc3_run_length= 6;
@@ -1014,7 +1012,7 @@ else
/****************************************/
/* decoding stuff */
-static VLC mb_non_intra_vlc;
+static VLC mb_non_intra_vlc[4];
static VLC mb_intra_vlc;
static VLC dc_lum_vlc[2];
static VLC dc_chroma_vlc[2];
@@ -1139,9 +1137,12 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
&mvtab[0][1], 2, 1,
&mvtab[0][0], 2, 1);
- init_vlc(&mb_non_intra_vlc, MB_NON_INTRA_VLC_BITS, 128,
- &table_mb_non_intra[0][1], 8, 4,
- &table_mb_non_intra[0][0], 8, 4);
+ for(i=0; i<4; i++){
+ init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128,
+ &wmv2_inter_table[i][0][1], 8, 4,
+ &wmv2_inter_table[i][0][0], 8, 4); //FIXME name?
+ }
+
init_vlc(&mb_intra_vlc, MB_INTRA_VLC_BITS, 64,
&table_mb_intra[0][1], 4, 2,
&table_mb_intra[0][0], 4, 2);
@@ -1167,6 +1168,9 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
case 4:
s->decode_mb= msmpeg4v34_decode_mb;
break;
+ case 5:
+ s->decode_mb= wmv2_decode_mb;
+ break;
}
s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe
@@ -1334,6 +1338,7 @@ return -1;
s->no_rounding = 0;
}
}
+//printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
s->esc3_level_length= 0;
s->esc3_run_length= 0;
@@ -1523,7 +1528,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
}
for (i = 0; i < 6; i++) {
- if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+ if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
@@ -1566,7 +1571,7 @@ printf("S ");
}
}
- code = get_vlc2(&s->gb, mb_non_intra_vlc.table, MB_NON_INTRA_VLC_BITS, 3);
+ code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
if (code < 0)
return -1;
//s->mb_intra = (code & 0x40) ? 0 : 1;
@@ -1628,7 +1633,7 @@ printf("%c", s->ac_pred ? 'A' : 'I');
}
for (i = 0; i < 6; i++) {
- if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+ if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
{
fprintf(stderr,"\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
return -1;
@@ -1639,13 +1644,12 @@ printf("%c", s->ac_pred ? 'A' : 'I');
}
//#define ERROR_DETAILS
static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
- int n, int coded)
+ int n, int coded, const uint8_t *scan_table)
{
int level, i, last, run, run_diff;
int dc_pred_dir;
RLTable *rl;
RL_VLC_ELEM *rl_vlc;
- const UINT8 *scan_table;
int qmul, qadd;
if (s->mb_intra) {
@@ -1713,7 +1717,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
s->block_last_index[n] = i;
return 0;
}
- scan_table = s->inter_scantable.permutated;
+ if(!scan_table)
+ scan_table = s->inter_scantable.permutated;
set_stat(ST_INTER_AC);
rl_vlc= rl->rl_vlc[s->qscale];
}
@@ -1889,7 +1894,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
i = 63; /* XXX: not optimal */
}
}
- if(s->msmpeg4_version==4 && i>0) i=63; //FIXME/XXX optimize
+ if(s->msmpeg4_version>=4 && i>0) i=63; //FIXME/XXX optimize
s->block_last_index[n] = i;
return 0;
@@ -1990,3 +1995,9 @@ static int msmpeg4_decode_motion(MpegEncContext * s,
*my_ptr = my;
return 0;
}
+
+/* cleanest way to support it
+ * there is too much shared between versions so that we cant have 1 file per version & 1 common
+ * as allmost everything would be in the common file
+ */
+#include "wmv2.c"
diff --git a/libavcodec/msmpeg4data.h b/libavcodec/msmpeg4data.h
index 93a72c54c1..3490fc08c4 100644
--- a/libavcodec/msmpeg4data.h
+++ b/libavcodec/msmpeg4data.h
@@ -3,7 +3,7 @@
*/
/* intra picture macro block coded block pattern */
-static const UINT16 table_mb_intra[64][2] = {
+static const uint16_t table_mb_intra[64][2] = {
{ 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 },
{ 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 },
{ 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 },
@@ -23,7 +23,7 @@ static const UINT16 table_mb_intra[64][2] = {
};
/* non intra picture macro block coded block pattern + mb type */
-static const UINT32 table_mb_non_intra[128][2] = {
+static const uint32_t table_mb_non_intra[128][2] = {
{ 0x40, 7 },{ 0x13c9, 13 },{ 0x9fd, 12 },{ 0x1fc, 15 },
{ 0x9fc, 12 },{ 0xa83, 18 },{ 0x12d34, 17 },{ 0x83bc, 16 },
{ 0x83a, 12 },{ 0x7f8, 17 },{ 0x3fd, 16 },{ 0x3ff, 16 },
@@ -60,7 +60,7 @@ static const UINT32 table_mb_non_intra[128][2] = {
/* dc table 0 */
-static const UINT32 table0_dc_lum[120][2] = {
+static const uint32_t table0_dc_lum[120][2] = {
{ 0x1, 1 },{ 0x1, 2 },{ 0x1, 4 },{ 0x1, 5 },
{ 0x5, 5 },{ 0x7, 5 },{ 0x8, 6 },{ 0xc, 6 },
{ 0x0, 7 },{ 0x2, 7 },{ 0x12, 7 },{ 0x1a, 7 },
@@ -93,7 +93,7 @@ static const UINT32 table0_dc_lum[120][2] = {
{ 0x6078c, 24 },{ 0x6078d, 24 },{ 0x6078e, 24 },{ 0x6078f, 24 },
};
-static const UINT32 table0_dc_chroma[120][2] = {
+static const uint32_t table0_dc_chroma[120][2] = {
{ 0x0, 2 },{ 0x1, 2 },{ 0x5, 3 },{ 0x9, 4 },
{ 0xd, 4 },{ 0x11, 5 },{ 0x1d, 5 },{ 0x1f, 5 },
{ 0x21, 6 },{ 0x31, 6 },{ 0x38, 6 },{ 0x33, 6 },
@@ -128,7 +128,7 @@ static const UINT32 table0_dc_chroma[120][2] = {
/* dc table 1 */
-static const UINT32 table1_dc_lum[120][2] = {
+static const uint32_t table1_dc_lum[120][2] = {
{ 0x2, 2 },{ 0x3, 2 },{ 0x3, 3 },{ 0x2, 4 },
{ 0x5, 4 },{ 0x1, 5 },{ 0x3, 5 },{ 0x8, 5 },
{ 0x0, 6 },{ 0x5, 6 },{ 0xd, 6 },{ 0xf, 6 },
@@ -161,7 +161,7 @@ static const UINT32 table1_dc_lum[120][2] = {
{ 0x1e6964, 26 },{ 0x1e6965, 26 },{ 0x1e6966, 26 },{ 0x1e6967, 26 },
};
-static const UINT32 table1_dc_chroma[120][2] = {
+static const uint32_t table1_dc_chroma[120][2] = {
{ 0x0, 2 },{ 0x1, 2 },{ 0x4, 3 },{ 0x7, 3 },
{ 0xb, 4 },{ 0xd, 4 },{ 0x15, 5 },{ 0x28, 6 },
{ 0x30, 6 },{ 0x32, 6 },{ 0x52, 7 },{ 0x62, 7 },
@@ -196,7 +196,7 @@ static const UINT32 table1_dc_chroma[120][2] = {
/* vlc table 0, for intra luma */
-static const UINT16 table0_vlc[133][2] = {
+static const uint16_t table0_vlc[133][2] = {
{ 0x1, 2 },{ 0x6, 3 },{ 0xf, 4 },{ 0x16, 5 },
{ 0x20, 6 },{ 0x18, 7 },{ 0x8, 8 },{ 0x9a, 8 },
{ 0x56, 9 },{ 0x13e, 9 },{ 0xf0, 10 },{ 0x3a5, 10 },
@@ -233,7 +233,7 @@ static const UINT16 table0_vlc[133][2] = {
{ 0x16, 7 },
};
-static const INT8 table0_level[132] = {
+static const int8_t table0_level[132] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
1, 2, 3, 4, 5, 6, 7, 8,
@@ -253,7 +253,7 @@ static const INT8 table0_level[132] = {
1, 1, 1, 1,
};
-static const INT8 table0_run[132] = {
+static const int8_t table0_run[132] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1,
@@ -275,7 +275,7 @@ static const INT8 table0_run[132] = {
/* vlc table 1, for intra chroma and P macro blocks */
-static const UINT16 table1_vlc[149][2] = {
+static const uint16_t table1_vlc[149][2] = {
{ 0x4, 3 },{ 0x14, 5 },{ 0x17, 7 },{ 0x7f, 8 },
{ 0x154, 9 },{ 0x1f2, 10 },{ 0xbf, 11 },{ 0x65, 12 },
{ 0xaaa, 12 },{ 0x630, 13 },{ 0x1597, 13 },{ 0x3b7, 14 },
@@ -316,7 +316,7 @@ static const UINT16 table1_vlc[149][2] = {
{ 0xd, 9 },
};
-static const INT8 table1_level[148] = {
+static const int8_t table1_level[148] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 1, 2,
3, 4, 5, 6, 7, 8, 9, 1,
@@ -338,7 +338,7 @@ static const INT8 table1_level[148] = {
1, 1, 1, 1,
};
-static const INT8 table1_run[148] = {
+static const int8_t table1_run[148] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2,
@@ -362,7 +362,7 @@ static const INT8 table1_run[148] = {
/* third vlc table */
-static const UINT16 table2_vlc[186][2] = {
+static const uint16_t table2_vlc[186][2] = {
{ 0x1, 2 },{ 0x5, 3 },{ 0xd, 4 },{ 0x12, 5 },
{ 0xe, 6 },{ 0x15, 7 },{ 0x13, 8 },{ 0x3f, 8 },
{ 0x4b, 9 },{ 0x11f, 9 },{ 0xb8, 10 },{ 0x3e3, 10 },
@@ -412,7 +412,7 @@ static const UINT16 table2_vlc[186][2] = {
{ 0x23dc, 14 },{ 0x4a, 9 },
};
-static const INT8 table2_level[185] = {
+static const int8_t table2_level[185] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 1, 2, 3, 4, 5,
@@ -439,7 +439,7 @@ static const INT8 table2_level[185] = {
1,
};
-static const INT8 table2_run[185] = {
+static const int8_t table2_run[185] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 1, 1, 1, 1,
@@ -467,7 +467,7 @@ static const INT8 table2_run[185] = {
};
/* second non intra vlc table */
-static const UINT16 table4_vlc[169][2] = {
+static const uint16_t table4_vlc[169][2] = {
{ 0x0, 3 },{ 0x3, 4 },{ 0xb, 5 },{ 0x14, 6 },
{ 0x3f, 6 },{ 0x5d, 7 },{ 0xa2, 8 },{ 0xac, 9 },
{ 0x16e, 9 },{ 0x20a, 10 },{ 0x2e2, 10 },{ 0x432, 11 },
@@ -513,7 +513,7 @@ static const UINT16 table4_vlc[169][2] = {
{ 0x169, 9 },
};
-static const INT8 table4_level[168] = {
+static const int8_t table4_level[168] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 1,
@@ -537,7 +537,7 @@ static const INT8 table4_level[168] = {
1, 1, 1, 1, 1, 1, 1, 1,
};
-static const INT8 table4_run[168] = {
+static const int8_t table4_run[168] = {
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1,
@@ -561,25 +561,25 @@ static const INT8 table4_run[168] = {
29, 30, 31, 32, 33, 34, 35, 36,
};
-extern const UINT16 inter_vlc[103][2];
-extern const INT8 inter_level[102];
-extern const INT8 inter_run[102];
+extern const uint16_t inter_vlc[103][2];
+extern const int8_t inter_level[102];
+extern const int8_t inter_run[102];
-extern const UINT16 intra_vlc[103][2];
-extern const INT8 intra_level[102];
-extern const INT8 intra_run[102];
+extern const uint16_t intra_vlc[103][2];
+extern const int8_t intra_level[102];
+extern const int8_t intra_run[102];
-extern const UINT8 DCtab_lum[13][2];
-extern const UINT8 DCtab_chrom[13][2];
+extern const uint8_t DCtab_lum[13][2];
+extern const uint8_t DCtab_chrom[13][2];
-extern const UINT8 cbpy_tab[16][2];
-extern const UINT8 mvtab[33][2];
+extern const uint8_t cbpy_tab[16][2];
+extern const uint8_t mvtab[33][2];
-extern const UINT8 intra_MCBPC_code[8];
-extern const UINT8 intra_MCBPC_bits[8];
+extern const uint8_t intra_MCBPC_code[8];
+extern const uint8_t intra_MCBPC_bits[8];
-extern const UINT8 inter_MCBPC_code[25];
-extern const UINT8 inter_MCBPC_bits[25];
+extern const uint8_t inter_MCBPC_code[25];
+extern const uint8_t inter_MCBPC_bits[25];
#define NB_RL_TABLES 6
@@ -632,7 +632,7 @@ static RLTable rl_table[NB_RL_TABLES] = {
/* motion vector table 0 */
-static const UINT16 table0_mv_code[1100] = {
+static const uint16_t table0_mv_code[1100] = {
0x0001, 0x0003, 0x0005, 0x0007, 0x0003, 0x0008, 0x000c, 0x0001,
0x0002, 0x001b, 0x0006, 0x000b, 0x0015, 0x0002, 0x000e, 0x000f,
0x0014, 0x0020, 0x0022, 0x0025, 0x0027, 0x0029, 0x002d, 0x004b,
@@ -773,7 +773,7 @@ static const UINT16 table0_mv_code[1100] = {
0x5f0d, 0x5f0e, 0x5f0f, 0x0000,
};
-static const UINT8 table0_mv_bits[1100] = {
+static const uint8_t table0_mv_bits[1100] = {
1, 4, 4, 4, 5, 5, 5, 6,
6, 6, 7, 7, 7, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8,
@@ -914,7 +914,7 @@ static const UINT8 table0_mv_bits[1100] = {
17, 17, 17, 8,
};
-static const UINT8 table0_mvx[1099] = {
+static const uint8_t table0_mvx[1099] = {
32, 32, 31, 32, 33, 31, 33, 31,
33, 32, 34, 32, 30, 32, 31, 34,
35, 32, 34, 33, 29, 33, 30, 30,
@@ -1055,7 +1055,7 @@ static const UINT8 table0_mvx[1099] = {
61, 19, 19,
};
-static const UINT8 table0_mvy[1099] = {
+static const uint8_t table0_mvy[1099] = {
32, 31, 32, 33, 32, 31, 31, 33,
33, 34, 32, 30, 32, 35, 34, 31,
32, 29, 33, 30, 32, 34, 33, 31,
@@ -1197,7 +1197,7 @@ static const UINT8 table0_mvy[1099] = {
};
/* motion vector table 1 */
-static const UINT16 table1_mv_code[1100] = {
+static const uint16_t table1_mv_code[1100] = {
0x0000, 0x0007, 0x0009, 0x000f, 0x000a, 0x0011, 0x001a, 0x001c,
0x0011, 0x0031, 0x0025, 0x002d, 0x002f, 0x006f, 0x0075, 0x0041,
0x004c, 0x004e, 0x005c, 0x0060, 0x0062, 0x0066, 0x0068, 0x0069,
@@ -1338,7 +1338,7 @@ static const UINT16 table1_mv_code[1100] = {
0x2473, 0x26a2, 0x26a3, 0x000b,
};
-static const UINT8 table1_mv_bits[1100] = {
+static const uint8_t table1_mv_bits[1100] = {
2, 4, 4, 4, 5, 5, 5, 5,
6, 6, 7, 7, 7, 7, 7, 8,
8, 8, 8, 8, 8, 8, 8, 8,
@@ -1479,7 +1479,7 @@ static const UINT8 table1_mv_bits[1100] = {
15, 15, 15, 4,
};
-static const UINT8 table1_mvx[1099] = {
+static const uint8_t table1_mvx[1099] = {
32, 31, 32, 31, 33, 32, 33, 33,
31, 34, 30, 32, 32, 34, 35, 32,
34, 33, 29, 30, 30, 32, 31, 31,
@@ -1620,7 +1620,7 @@ static const UINT8 table1_mvx[1099] = {
0, 12, 27,
};
-static const UINT8 table1_mvy[1099] = {
+static const uint8_t table1_mvy[1099] = {
32, 32, 31, 31, 32, 33, 31, 33,
33, 32, 32, 30, 34, 31, 32, 29,
33, 30, 32, 33, 31, 35, 34, 30,
@@ -1764,11 +1764,11 @@ static const UINT8 table1_mvy[1099] = {
/* motion vector table */
typedef struct MVTable {
int n;
- const UINT16 *table_mv_code;
- const UINT8 *table_mv_bits;
- const UINT8 *table_mvx;
- const UINT8 *table_mvy;
- UINT16 *table_mv_index; /* encoding: convert mv to index in table_mv */
+ const uint16_t *table_mv_code;
+ const uint8_t *table_mv_bits;
+ const uint8_t *table_mvx;
+ const uint8_t *table_mvy;
+ uint16_t *table_mv_index; /* encoding: convert mv to index in table_mv */
VLC vlc; /* decoding: vlc */
} MVTable;
@@ -1789,29 +1789,29 @@ static MVTable mv_tables[2] = {
}
};
-static const UINT8 v2_mb_type[8][2] = {
+static const uint8_t v2_mb_type[8][2] = {
{1, 1}, {0 , 2}, {3 , 3}, {9 , 5},
{5, 4}, {0x21, 7}, {0x20, 7}, {0x11, 6},
};
-static const UINT8 v2_intra_cbpc[4][2] = {
+static const uint8_t v2_intra_cbpc[4][2] = {
{1, 1}, {0, 3}, {1, 3}, {1, 2},
};
-static UINT8 wmv1_y_dc_scale_table[32]={
+static uint8_t wmv1_y_dc_scale_table[32]={
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0, 8, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21
};
-static UINT8 wmv1_c_dc_scale_table[32]={
+static uint8_t wmv1_c_dc_scale_table[32]={
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22
};
-static UINT8 old_ff_y_dc_scale_table[32]={
+static uint8_t old_ff_y_dc_scale_table[32]={
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
0, 8, 8, 8, 8,10,12,14,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
};
-static UINT8 old_ff_c_dc_scale_table[32]={
+static uint8_t old_ff_c_dc_scale_table[32]={
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
0, 8, 8, 8, 8, 9, 9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,22
};
@@ -1819,7 +1819,7 @@ static UINT8 old_ff_c_dc_scale_table[32]={
#define WMV1_SCANTABLE_COUNT 4
-static const UINT8 wmv1_scantable00[64]= {
+static const uint8_t wmv1_scantable00[64]= {
0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05,
@@ -1829,7 +1829,7 @@ static const UINT8 wmv1_scantable00[64]= {
0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35,
0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F,
};
-static const UINT8 wmv1_scantable01[64]= {
+static const uint8_t wmv1_scantable01[64]= {
0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D,
@@ -1839,7 +1839,7 @@ static const UINT8 wmv1_scantable01[64]= {
0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35,
0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F,
};
-static const UINT8 wmv1_scantable02[64]= {
+static const uint8_t wmv1_scantable02[64]= {
0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18,
0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20,
0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07,
@@ -1849,7 +1849,7 @@ static const UINT8 wmv1_scantable02[64]= {
0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35,
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
};
-static const UINT8 wmv1_scantable03[64]= {
+static const uint8_t wmv1_scantable03[64]= {
0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09,
0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29,
0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13,
@@ -1860,14 +1860,141 @@ static const UINT8 wmv1_scantable03[64]= {
0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
};
-static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
+static const uint8_t *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
wmv1_scantable00,
wmv1_scantable01,
wmv1_scantable02,
wmv1_scantable03,
};
-static UINT8 table_inter_intra[4][2]={
+static const uint8_t table_inter_intra[4][2]={
{0,1},{2,2},{6,3},{7,3}
};
+#define WMV2_INTER_CBP_TABLE_COUNT 4
+
+static const uint32_t table_mb_non_intra2[128][2] = {
+{0x0000A7, 14}, {0x01B2B8, 18}, {0x01B28E, 18}, {0x036575, 19},
+{0x006CAC, 16}, {0x000A69, 18}, {0x002934, 20}, {0x00526B, 21},
+{0x006CA1, 16}, {0x01B2B9, 18}, {0x0029AD, 20}, {0x029353, 24},
+{0x006CA7, 16}, {0x006CAB, 16}, {0x01B2BB, 18}, {0x00029B, 16},
+{0x00D944, 17}, {0x000A6A, 18}, {0x0149A8, 23}, {0x03651F, 19},
+{0x006CAF, 16}, {0x000A4C, 18}, {0x03651E, 19}, {0x000A48, 18},
+{0x00299C, 20}, {0x00299F, 20}, {0x029352, 24}, {0x0029AC, 20},
+{0x000296, 16}, {0x00D946, 17}, {0x000A68, 18}, {0x000298, 16},
+{0x000527, 17}, {0x00D94D, 17}, {0x0014D7, 19}, {0x036574, 19},
+{0x000A5C, 18}, {0x01B299, 18}, {0x00299D, 20}, {0x00299E, 20},
+{0x000525, 17}, {0x000A66, 18}, {0x00A4D5, 22}, {0x00149B, 19},
+{0x000295, 16}, {0x006CAD, 16}, {0x000A49, 18}, {0x000521, 17},
+{0x006CAA, 16}, {0x00D945, 17}, {0x01B298, 18}, {0x00052F, 17},
+{0x003654, 15}, {0x006CA0, 16}, {0x000532, 17}, {0x000291, 16},
+{0x003652, 15}, {0x000520, 17}, {0x000A5D, 18}, {0x000294, 16},
+{0x00009B, 11}, {0x0006E2, 12}, {0x000028, 12}, {0x0001B0, 10},
+{0x000001, 3}, {0x000010, 8}, {0x00002F, 6}, {0x00004C, 10},
+{0x00000D, 4}, {0x000000, 10}, {0x000006, 9}, {0x000134, 12},
+{0x00000C, 4}, {0x000007, 10}, {0x000007, 9}, {0x0006E1, 12},
+{0x00000E, 5}, {0x0000DA, 9}, {0x000022, 9}, {0x000364, 11},
+{0x00000F, 4}, {0x000006, 10}, {0x00000F, 9}, {0x000135, 12},
+{0x000014, 5}, {0x0000DD, 9}, {0x000004, 9}, {0x000015, 11},
+{0x00001A, 6}, {0x0001B3, 10}, {0x000005, 10}, {0x0006E3, 12},
+{0x00000C, 5}, {0x0000B9, 8}, {0x000004, 8}, {0x0000DB, 9},
+{0x00000E, 4}, {0x00000B, 10}, {0x000023, 9}, {0x0006CB, 12},
+{0x000005, 6}, {0x0001B1, 10}, {0x000001, 10}, {0x0006E0, 12},
+{0x000011, 5}, {0x0000DF, 9}, {0x00000E, 9}, {0x000373, 11},
+{0x000003, 5}, {0x0000B8, 8}, {0x000006, 8}, {0x000175, 9},
+{0x000015, 5}, {0x000174, 9}, {0x000027, 9}, {0x000372, 11},
+{0x000010, 5}, {0x0000BB, 8}, {0x000005, 8}, {0x0000DE, 9},
+{0x00000F, 5}, {0x000001, 9}, {0x000012, 8}, {0x000004, 10},
+{0x000002, 3}, {0x000016, 5}, {0x000009, 4}, {0x000001, 5},
+};
+
+static const uint32_t table_mb_non_intra3[128][2] = {
+{0x0002A1, 10}, {0x005740, 15}, {0x01A0BF, 18}, {0x015D19, 17},
+{0x001514, 13}, {0x00461E, 15}, {0x015176, 17}, {0x015177, 17},
+{0x0011AD, 13}, {0x00682E, 16}, {0x0682F9, 20}, {0x03417D, 19},
+{0x001A36, 14}, {0x002A2D, 14}, {0x00D05E, 17}, {0x006824, 16},
+{0x001515, 13}, {0x00545C, 15}, {0x0230E9, 18}, {0x011AFA, 17},
+{0x0015D7, 13}, {0x005747, 15}, {0x008D79, 16}, {0x006825, 16},
+{0x002BA2, 14}, {0x00A8BA, 16}, {0x0235F6, 18}, {0x015D18, 17},
+{0x0011AE, 13}, {0x00346F, 15}, {0x008C3B, 16}, {0x00346E, 15},
+{0x000D1A, 13}, {0x00461F, 15}, {0x0682F8, 20}, {0x011875, 17},
+{0x002BA1, 14}, {0x008D61, 16}, {0x0235F7, 18}, {0x0230E8, 18},
+{0x001513, 13}, {0x008D7B, 16}, {0x011AF4, 17}, {0x011AF5, 17},
+{0x001185, 13}, {0x0046BF, 15}, {0x008D60, 16}, {0x008D7C, 16},
+{0x001512, 13}, {0x00461C, 15}, {0x00AE8D, 16}, {0x008D78, 16},
+{0x000D0E, 13}, {0x003413, 15}, {0x0046B1, 15}, {0x003416, 15},
+{0x000AEA, 12}, {0x002A2C, 14}, {0x005741, 15}, {0x002A2F, 14},
+{0x000158, 9}, {0x0008D2, 12}, {0x00054C, 11}, {0x000686, 12},
+{0x000000, 2}, {0x000069, 8}, {0x00006B, 8}, {0x00068C, 12},
+{0x000007, 3}, {0x00015E, 9}, {0x0002A3, 10}, {0x000AE9, 12},
+{0x000006, 3}, {0x000231, 10}, {0x0002B8, 10}, {0x001A08, 14},
+{0x000010, 5}, {0x0001A9, 10}, {0x000342, 11}, {0x000A88, 12},
+{0x000004, 4}, {0x0001A2, 10}, {0x0002A4, 10}, {0x001184, 13},
+{0x000012, 5}, {0x000232, 10}, {0x0002B2, 10}, {0x000680, 12},
+{0x00001B, 6}, {0x00046A, 11}, {0x00068E, 12}, {0x002359, 14},
+{0x000016, 5}, {0x00015F, 9}, {0x0002A0, 10}, {0x00054D, 11},
+{0x000005, 4}, {0x000233, 10}, {0x0002B9, 10}, {0x0015D6, 13},
+{0x000022, 6}, {0x000468, 11}, {0x000683, 12}, {0x001A0A, 14},
+{0x000013, 5}, {0x000236, 10}, {0x0002BB, 10}, {0x001186, 13},
+{0x000017, 5}, {0x0001AB, 10}, {0x0002A7, 10}, {0x0008D3, 12},
+{0x000014, 5}, {0x000237, 10}, {0x000460, 11}, {0x000D0F, 13},
+{0x000019, 6}, {0x0001AA, 10}, {0x0002B3, 10}, {0x000681, 12},
+{0x000018, 6}, {0x0001A8, 10}, {0x0002A5, 10}, {0x00068F, 12},
+{0x000007, 4}, {0x000055, 7}, {0x000047, 7}, {0x0000AD, 8},
+};
+
+static const uint32_t table_mb_non_intra4[128][2] = {
+{0x0000D4, 8}, {0x0021C5, 14}, {0x00F18A, 16}, {0x00D5BC, 16},
+{0x000879, 12}, {0x00354D, 14}, {0x010E3F, 17}, {0x010F54, 17},
+{0x000866, 12}, {0x00356E, 14}, {0x010F55, 17}, {0x010E3E, 17},
+{0x0010CE, 13}, {0x003C84, 14}, {0x00D5BD, 16}, {0x00F18B, 16},
+{0x000868, 12}, {0x00438C, 15}, {0x0087AB, 16}, {0x00790B, 15},
+{0x000F10, 12}, {0x00433D, 15}, {0x006AD3, 15}, {0x00790A, 15},
+{0x001AA7, 13}, {0x0043D4, 15}, {0x00871E, 16}, {0x006ADF, 15},
+{0x000D7C, 12}, {0x003C94, 14}, {0x00438D, 15}, {0x006AD2, 15},
+{0x0006BC, 11}, {0x0021E9, 14}, {0x006ADA, 15}, {0x006A99, 15},
+{0x0010F7, 13}, {0x004389, 15}, {0x006ADB, 15}, {0x0078C4, 15},
+{0x000D56, 12}, {0x0035F7, 14}, {0x00438E, 15}, {0x006A98, 15},
+{0x000D52, 12}, {0x003C95, 14}, {0x004388, 15}, {0x00433C, 15},
+{0x000D54, 12}, {0x001E4B, 13}, {0x003C63, 14}, {0x003C83, 14},
+{0x000861, 12}, {0x0021EB, 14}, {0x00356C, 14}, {0x0035F6, 14},
+{0x000863, 12}, {0x00219F, 14}, {0x003568, 14}, {0x003C82, 14},
+{0x0001AE, 9}, {0x0010C0, 13}, {0x000F11, 12}, {0x001AFA, 13},
+{0x000000, 1}, {0x0000F0, 8}, {0x0001AD, 9}, {0x0010C1, 13},
+{0x00000A, 4}, {0x0003C5, 10}, {0x000789, 11}, {0x001AB5, 13},
+{0x000009, 4}, {0x000435, 11}, {0x000793, 11}, {0x001E40, 13},
+{0x00001D, 5}, {0x0003CB, 10}, {0x000878, 12}, {0x001AAF, 13},
+{0x00000B, 4}, {0x0003C7, 10}, {0x000791, 11}, {0x001AAB, 13},
+{0x00001F, 5}, {0x000436, 11}, {0x0006BF, 11}, {0x000F19, 12},
+{0x00003D, 6}, {0x000D51, 12}, {0x0010C4, 13}, {0x0021E8, 14},
+{0x000036, 6}, {0x000437, 11}, {0x0006AF, 11}, {0x0010C5, 13},
+{0x00000C, 4}, {0x000432, 11}, {0x000794, 11}, {0x001E30, 13},
+{0x000042, 7}, {0x000870, 12}, {0x000F24, 12}, {0x001E43, 13},
+{0x000020, 6}, {0x00043E, 11}, {0x000795, 11}, {0x001AAA, 13},
+{0x000037, 6}, {0x0006AC, 11}, {0x0006AE, 11}, {0x0010F6, 13},
+{0x000034, 6}, {0x00043A, 11}, {0x000D50, 12}, {0x001AAE, 13},
+{0x000039, 6}, {0x00043F, 11}, {0x00078D, 11}, {0x0010D2, 13},
+{0x000038, 6}, {0x00043B, 11}, {0x0006BD, 11}, {0x0010D3, 13},
+{0x000011, 5}, {0x0001AC, 9}, {0x0000F3, 8}, {0x000439, 11},
+};
+
+static const uint32_t (*wmv2_inter_table[WMV2_INTER_CBP_TABLE_COUNT])[2]={
+ table_mb_non_intra2,
+ table_mb_non_intra3,
+ table_mb_non_intra4,
+ table_mb_non_intra,
+};
+
+static const uint8_t wmv2_scantableA[64]={
+0x00, 0x01, 0x02, 0x08, 0x03, 0x09, 0x0A, 0x10,
+0x04, 0x0B, 0x11, 0x18, 0x12, 0x0C, 0x05, 0x13,
+0x19, 0x0D, 0x14, 0x1A, 0x1B, 0x06, 0x15, 0x1C,
+0x0E, 0x16, 0x1D, 0x07, 0x1E, 0x0F, 0x17, 0x1F,
+};
+
+static const uint8_t wmv2_scantableB[64]={
+0x00, 0x08, 0x01, 0x10, 0x09, 0x18, 0x11, 0x02,
+0x20, 0x0A, 0x19, 0x28, 0x12, 0x30, 0x21, 0x1A,
+0x38, 0x29, 0x22, 0x03, 0x31, 0x39, 0x0B, 0x2A,
+0x13, 0x32, 0x1B, 0x3A, 0x23, 0x2B, 0x33, 0x3B,
+};
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index ad27ac5942..8c9ce7b936 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -473,3 +473,93 @@ void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block)
idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
}
}
+
+/* 8x4 & 4x8 WMV2 IDCT */
+#undef CN_SHIFT
+#undef C_SHIFT
+#undef C_FIX
+#undef C1
+#undef C2
+#define CN_SHIFT 12
+#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5))
+#define C1 C_FIX(0.6532814824)
+#define C2 C_FIX(0.2705980501)
+#define C3 C_FIX(0.5)
+#define C_SHIFT (4+1+12)
+static inline void idct4col_add(UINT8 *dest, int line_size, const INT16 *col)
+{
+ int c0, c1, c2, c3, a0, a1, a2, a3;
+ const UINT8 *cm = cropTbl + MAX_NEG_CROP;
+
+ a0 = col[8*0];
+ a1 = col[8*1];
+ a2 = col[8*2];
+ a3 = col[8*3];
+ c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1));
+ c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1));
+ c1 = a1 * C1 + a3 * C2;
+ c3 = a1 * C2 - a3 * C1;
+ dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)];
+ dest += line_size;
+ dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)];
+}
+
+#define RN_SHIFT 15
+#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5))
+#define R1 R_FIX(0.6532814824)
+#define R2 R_FIX(0.2705980501)
+#define R3 R_FIX(0.5)
+#define R_SHIFT 11
+static inline void idct4row(INT16 *row)
+{
+ int c0, c1, c2, c3, a0, a1, a2, a3;
+ const UINT8 *cm = cropTbl + MAX_NEG_CROP;
+
+ a0 = row[0];
+ a1 = row[1];
+ a2 = row[2];
+ a3 = row[3];
+ c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1));
+ c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1));
+ c1 = a1 * R1 + a3 * R2;
+ c3 = a1 * R2 - a3 * R1;
+ row[0]= (c0 + c1) >> R_SHIFT;
+ row[1]= (c2 + c3) >> R_SHIFT;
+ row[2]= (c2 - c3) >> R_SHIFT;
+ row[3]= (c0 - c1) >> R_SHIFT;
+}
+
+void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block)
+{
+ int i;
+
+ /* IDCT8 on each line */
+ for(i=0; i<4; i++) {
+ idctRowCondDC(block + i*8);
+ }
+
+ /* IDCT4 and store */
+ for(i=0;i<8;i++) {
+ idct4col_add(dest + i, line_size, block + i);
+ }
+}
+
+void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block)
+{
+ int i;
+
+ /* IDCT4 on each line */
+ for(i=0; i<8; i++) {
+ idct4row(block + i*8);
+ }
+
+ /* IDCT8 and store */
+ for(i=0; i<4; i++){
+ idctSparseColAdd(dest + i, line_size, block + i);
+ }
+}
+
diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h
index 6c6b4f0115..428c6072c8 100644
--- a/libavcodec/simple_idct.h
+++ b/libavcodec/simple_idct.h
@@ -26,3 +26,6 @@ void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block);
void simple_idct(short *block);
void simple_idct248_put(UINT8 *dest, int line_size, INT16 *block);
+
+void simple_idct84_add(UINT8 *dest, int line_size, INT16 *block);
+void simple_idct48_add(UINT8 *dest, int line_size, INT16 *block);
diff --git a/libavcodec/wmv2.c b/libavcodec/wmv2.c
new file mode 100644
index 0000000000..d25b7a5f17
--- /dev/null
+++ b/libavcodec/wmv2.c
@@ -0,0 +1,850 @@
+/*
+ * Copyright (c) 2002 The FFmpeg Project.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include "simple_idct.h"
+
+#define SKIP_TYPE_NONE 0
+#define SKIP_TYPE_MPEG 1
+#define SKIP_TYPE_ROW 2
+#define SKIP_TYPE_COL 3
+
+
+typedef struct Wmv2Context{
+ MpegEncContext s;
+ int j_type_bit;
+ int j_type;
+ int flag3;
+ int flag63;
+ int abt_flag;
+ int abt_type;
+ int abt_type_table[6];
+ int per_mb_abt;
+ int per_block_abt;
+ int mspel_bit;
+ int cbp_table_index;
+ int top_left_mv_flag;
+ int per_mb_rl_bit;
+ int skip_type;
+ int hshift;
+
+ ScanTable abt_scantable[2];
+ DCTELEM abt_block2[6][64] __align8;
+}Wmv2Context;
+
+static void wmv2_common_init(Wmv2Context * w){
+ MpegEncContext * const s= &w->s;
+
+ ff_init_scantable(s, &w->abt_scantable[0], wmv2_scantableA);
+ ff_init_scantable(s, &w->abt_scantable[1], wmv2_scantableB);
+}
+
+static int encode_ext_header(Wmv2Context *w){
+ MpegEncContext * const s= &w->s;
+ PutBitContext pb;
+ int code;
+
+ init_put_bits(&pb, s->avctx->extradata, s->avctx->extradata_size, NULL, NULL);
+
+ put_bits(&pb, 5, s->frame_rate / FRAME_RATE_BASE); //yes 29.97 -> 29
+ put_bits(&pb, 11, FFMIN(s->bit_rate/1024, 2047));
+
+ put_bits(&pb, 1, w->mspel_bit=1);
+ put_bits(&pb, 1, w->flag3=1);
+ put_bits(&pb, 1, w->abt_flag=1);
+ put_bits(&pb, 1, w->j_type_bit=1);
+ put_bits(&pb, 1, w->top_left_mv_flag=0);
+ put_bits(&pb, 1, w->per_mb_rl_bit=1);
+ put_bits(&pb, 3, code=1);
+
+ flush_put_bits(&pb);
+
+ s->slice_height = s->mb_height / code;
+
+ return 0;
+}
+
+static int wmv2_encode_init(AVCodecContext *avctx){
+ Wmv2Context * const w= avctx->priv_data;
+
+ if(MPV_encode_init(avctx) < 0)
+ return -1;
+
+ wmv2_common_init(w);
+
+ avctx->extradata_size= 4;
+ avctx->extradata= av_mallocz(avctx->extradata_size + 10);
+ encode_ext_header(w);
+
+ return 0;
+}
+
+static int wmv2_encode_end(AVCodecContext *avctx){
+ Wmv2Context * const w= avctx->priv_data;
+
+ if(MPV_encode_end(avctx) < 0)
+ return -1;
+
+ avctx->extradata_size= 0;
+ av_freep(&avctx->extradata);
+
+ return 0;
+}
+
+int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
+{
+ Wmv2Context * const w= (Wmv2Context*)s;
+
+ put_bits(&s->pb, 1, s->pict_type - 1);
+ if(s->pict_type == I_TYPE){
+ put_bits(&s->pb, 7, 0);
+ }
+ put_bits(&s->pb, 5, s->qscale);
+
+ s->dc_table_index = 1;
+ s->mv_table_index = 1; /* only if P frame */
+// s->use_skip_mb_code = 1; /* only if P frame */
+ s->per_mb_rl_table = 0;
+ s->mspel= 0;
+ w->per_mb_abt=0;
+ w->abt_type=0;
+ w->j_type=0;
+
+ if (s->pict_type == I_TYPE) {
+ if(w->j_type_bit) put_bits(&s->pb, 1, w->j_type);
+
+ if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
+
+ if(!s->per_mb_rl_table){
+ code012(&s->pb, s->rl_chroma_table_index);
+ code012(&s->pb, s->rl_table_index);
+ }
+
+ put_bits(&s->pb, 1, s->dc_table_index);
+
+ s->inter_intra_pred= 0;
+ s->no_rounding = 1;
+ }else{
+ int cbp_index;
+
+ put_bits(&s->pb, 2, SKIP_TYPE_NONE);
+
+ code012(&s->pb, cbp_index=0);
+ if(s->qscale <= 10){
+ int map[3]= {0,2,1};
+ w->cbp_table_index= map[cbp_index];
+ }else if(s->qscale <= 20){
+ int map[3]= {1,0,2};
+ w->cbp_table_index= map[cbp_index];
+ }else{
+ int map[3]= {2,1,0};
+ w->cbp_table_index= map[cbp_index];
+ }
+
+ if(w->mspel_bit) put_bits(&s->pb, 1, s->mspel);
+
+ if(w->abt_flag){
+ put_bits(&s->pb, 1, w->per_mb_abt^1);
+ if(!w->per_mb_abt){
+ code012(&s->pb, w->abt_type);
+ }
+ }
+
+ if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
+
+ if(!s->per_mb_rl_table){
+ code012(&s->pb, s->rl_table_index);
+ s->rl_chroma_table_index = s->rl_table_index;
+ }
+ put_bits(&s->pb, 1, s->dc_table_index);
+ put_bits(&s->pb, 1, s->mv_table_index);
+
+ s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
+ s->no_rounding ^= 1;
+ }
+ s->esc3_level_length= 0;
+ s->esc3_run_length= 0;
+
+ return 0;
+}
+
+// nearly idential to wmv1 but thats just because we dont use the useless M$ crap features
+// its duplicated here in case someone wants to add support for these carp features
+void ff_wmv2_encode_mb(MpegEncContext * s,
+ DCTELEM block[6][64],
+ int motion_x, int motion_y)
+{
+ Wmv2Context * const w= (Wmv2Context*)s;
+ int cbp, coded_cbp, i;
+ int pred_x, pred_y;
+ UINT8 *coded_block;
+
+ handle_slices(s);
+
+ if (!s->mb_intra) {
+ /* compute cbp */
+ set_stat(ST_INTER_MB);
+ cbp = 0;
+ for (i = 0; i < 6; i++) {
+ if (s->block_last_index[i] >= 0)
+ cbp |= 1 << (5 - i);
+ }
+
+ put_bits(&s->pb,
+ wmv2_inter_table[w->cbp_table_index][cbp + 64][1],
+ wmv2_inter_table[w->cbp_table_index][cbp + 64][0]);
+
+ /* motion vector */
+ h263_pred_motion(s, 0, &pred_x, &pred_y);
+ msmpeg4_encode_motion(s, motion_x - pred_x,
+ motion_y - pred_y);
+ } else {
+ /* compute cbp */
+ cbp = 0;
+ coded_cbp = 0;
+ for (i = 0; i < 6; i++) {
+ int val, pred;
+ val = (s->block_last_index[i] >= 1);
+ cbp |= val << (5 - i);
+ if (i < 4) {
+ /* predict value for close blocks only for luma */
+ pred = coded_block_pred(s, i, &coded_block);
+ *coded_block = val;
+ val = val ^ pred;
+ }
+ coded_cbp |= val << (5 - i);
+ }
+#if 0
+ if (coded_cbp)
+ printf("cbp=%x %x\n", cbp, coded_cbp);
+#endif
+
+ if (s->pict_type == I_TYPE) {
+ set_stat(ST_INTRA_MB);
+ put_bits(&s->pb,
+ table_mb_intra[coded_cbp][1], table_mb_intra[coded_cbp][0]);
+ } else {
+ put_bits(&s->pb,
+ wmv2_inter_table[w->cbp_table_index][cbp][1],
+ wmv2_inter_table[w->cbp_table_index][cbp][0]);
+ }
+ set_stat(ST_INTRA_MB);
+ put_bits(&s->pb, 1, 0); /* no AC prediction yet */
+ if(s->inter_intra_pred){
+ s->h263_aic_dir=0;
+ put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
+ }
+ }
+
+ for (i = 0; i < 6; i++) {
+ msmpeg4_encode_block(s, block[i], i);
+ }
+}
+
+static void parse_mb_skip(Wmv2Context * w){
+ int mb_x, mb_y;
+ MpegEncContext * const s= &w->s;
+
+ w->skip_type= get_bits(&s->gb, 2);
+ switch(w->skip_type){
+ case SKIP_TYPE_NONE:
+ for(mb_y=0; mb_y<s->mb_height; mb_y++){
+ for(mb_x=0; mb_x<s->mb_width; mb_x++){
+ s->mb_type[mb_y*s->mb_width + mb_x]= 0;
+ }
+ }
+ break;
+ case SKIP_TYPE_MPEG:
+ for(mb_y=0; mb_y<s->mb_height; mb_y++){
+ for(mb_x=0; mb_x<s->mb_width; mb_x++){
+ s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
+ }
+ }
+ break;
+ case SKIP_TYPE_ROW:
+ for(mb_y=0; mb_y<s->mb_height; mb_y++){
+ if(get_bits1(&s->gb)){
+ for(mb_x=0; mb_x<s->mb_width; mb_x++){
+ s->mb_type[mb_y*s->mb_width + mb_x]= MB_TYPE_SKIPED;
+ }
+ }else{
+ for(mb_x=0; mb_x<s->mb_width; mb_x++){
+ s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
+ }
+ }
+ }
+ break;
+ case SKIP_TYPE_COL:
+ for(mb_x=0; mb_x<s->mb_width; mb_x++){
+ if(get_bits1(&s->gb)){
+ for(mb_y=0; mb_y<s->mb_height; mb_y++){
+ s->mb_type[mb_y*s->mb_width + mb_x]= MB_TYPE_SKIPED;
+ }
+ }else{
+ for(mb_y=0; mb_y<s->mb_height; mb_y++){
+ s->mb_type[mb_y*s->mb_width + mb_x]= get_bits1(&s->gb) ? MB_TYPE_SKIPED : 0;
+ }
+ }
+ }
+ break;
+ }
+}
+
+static int decode_ext_header(Wmv2Context *w){
+ MpegEncContext * const s= &w->s;
+ GetBitContext gb;
+ int fps;
+ int code;
+
+ if(s->avctx->extradata_size<4) return -1;
+
+ init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size);
+
+ fps = get_bits(&gb, 5);
+ s->bit_rate = get_bits(&gb, 11)*1024;
+ w->mspel_bit = get_bits1(&gb);
+ w->flag3 = get_bits1(&gb);
+ w->abt_flag = get_bits1(&gb);
+ w->j_type_bit = get_bits1(&gb);
+ w->top_left_mv_flag= get_bits1(&gb);
+ w->per_mb_rl_bit = get_bits1(&gb);
+ code = get_bits(&gb, 3);
+
+ if(code==0) return -1;
+
+ s->slice_height = s->mb_height / code;
+
+ if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+ printf("fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d\n",
+ fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3);
+ }
+ return 0;
+}
+
+int ff_wmv2_decode_picture_header(MpegEncContext * s)
+{
+ Wmv2Context * const w= (Wmv2Context*)s;
+ int code, i;
+
+#if 0
+{
+int i;
+for(i=0; i<s->gb.size*8; i++)
+ printf("%d", get_bits1(&s->gb));
+// get_bits1(&s->gb);
+printf("END\n");
+return -1;
+}
+#endif
+ if(s->picture_number==0)
+ decode_ext_header(w);
+
+ s->pict_type = get_bits(&s->gb, 1) + 1;
+ if(s->pict_type == I_TYPE){
+ code = get_bits(&s->gb, 7);
+ printf("I7:%X/\n", code);
+ }
+ s->qscale = get_bits(&s->gb, 5);
+
+ if (s->pict_type == I_TYPE) {
+ if(w->j_type_bit) w->j_type= get_bits1(&s->gb);
+ else w->j_type= 0; //FIXME check
+
+ if(!w->j_type){
+ if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
+ else s->per_mb_rl_table= 0;
+
+ if(!s->per_mb_rl_table){
+ s->rl_chroma_table_index = decode012(&s->gb);
+ s->rl_table_index = decode012(&s->gb);
+ }
+
+ s->dc_table_index = get_bits1(&s->gb);
+ }
+ s->inter_intra_pred= 0;
+ s->no_rounding = 1;
+ if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+ printf("qscale:%d rlc:%d rl:%d dc:%d mbrl:%d j_type:%d \n",
+ s->qscale,
+ s->rl_chroma_table_index,
+ s->rl_table_index,
+ s->dc_table_index,
+ s->per_mb_rl_table,
+ w->j_type);
+ }
+ }else{
+ int cbp_index;
+ w->j_type=0;
+
+ parse_mb_skip(w);
+ cbp_index= decode012(&s->gb);
+ if(s->qscale <= 10){
+ int map[3]= {0,2,1};
+ w->cbp_table_index= map[cbp_index];
+ }else if(s->qscale <= 20){
+ int map[3]= {1,0,2};
+ w->cbp_table_index= map[cbp_index];
+ }else{
+ int map[3]= {2,1,0};
+ w->cbp_table_index= map[cbp_index];
+ }
+
+ if(w->mspel_bit) s->mspel= get_bits1(&s->gb);
+ else s->mspel= 0; //FIXME check
+
+ if(w->abt_flag){
+ w->per_mb_abt= get_bits1(&s->gb)^1;
+ if(!w->per_mb_abt){
+ w->abt_type= decode012(&s->gb);
+ }
+ }
+
+ if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
+ else s->per_mb_rl_table= 0;
+
+ if(!s->per_mb_rl_table){
+ s->rl_table_index = decode012(&s->gb);
+ s->rl_chroma_table_index = s->rl_table_index;
+ }
+
+ s->dc_table_index = get_bits1(&s->gb);
+ s->mv_table_index = get_bits1(&s->gb);
+
+ s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
+ s->no_rounding ^= 1;
+
+ if(s->avctx->debug&FF_DEBUG_PICT_INFO){
+ printf("rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d mspel:%d per_mb_abt:%d abt_type:%d cbp:%d ii:%d\n",
+ s->rl_table_index,
+ s->rl_chroma_table_index,
+ s->dc_table_index,
+ s->mv_table_index,
+ s->per_mb_rl_table,
+ s->qscale,
+ s->mspel,
+ w->per_mb_abt,
+ w->abt_type,
+ w->cbp_table_index,
+ s->inter_intra_pred);
+ }
+ }
+ s->esc3_level_length= 0;
+ s->esc3_run_length= 0;
+
+ if(s->avctx->debug&FF_DEBUG_SKIP){
+ for(i=0; i<s->mb_num; i++){
+ if(i%s->mb_width==0) printf("\n");
+ printf("%d", s->mb_type[i]);
+ }
+ }
+s->picture_number++; //FIXME ?
+
+
+// if(w->j_type)
+// return wmv2_decode_j_picture(w); //FIXME
+
+ if(w->j_type){
+ printf("J-type picture isnt supported\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+void ff_wmv2_decode_init(MpegEncContext *s){
+}
+
+static inline int wmv2_decode_motion(Wmv2Context *w, int *mx_ptr, int *my_ptr){
+ MpegEncContext * const s= &w->s;
+ int ret;
+
+ ret= msmpeg4_decode_motion(s, mx_ptr, my_ptr);
+
+ if(ret<0) return -1;
+
+ if((((*mx_ptr)|(*my_ptr)) & 1) && s->mspel)
+ w->hshift= get_bits1(&s->gb);
+ else
+ w->hshift= 0;
+
+//printf("%d %d ", *mx_ptr, *my_ptr);
+
+ return 0;
+}
+
+static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){
+ MpegEncContext * const s= &w->s;
+ int xy, wrap, diff, type;
+ INT16 *A, *B, *C, *mot_val;
+
+ wrap = s->block_wrap[0];
+ xy = s->block_index[0];
+
+ mot_val = s->motion_val[xy];
+
+ A = s->motion_val[xy - 1];
+ B = s->motion_val[xy - wrap];
+ C = s->motion_val[xy + 2 - wrap];
+
+ diff= FFMAX(ABS(A[0] - B[0]), ABS(A[1] - B[1]));
+
+ if(s->mb_x && s->mb_y && !s->mspel && w->top_left_mv_flag && diff >= 8)
+ //FIXME top/left bit too if y=!0 && first_slice_line?
+ type= get_bits1(&s->gb);
+ else
+ type= 2;
+
+ if(type == 0){
+ *px= A[0];
+ *py= A[1];
+ }else if(type == 1){
+ *px= B[0];
+ *py= B[1];
+ }else{
+ /* special case for first (slice) line */
+ if (s->first_slice_line) {
+ *px = A[0];
+ *py = A[1];
+ } else {
+ *px = mid_pred(A[0], B[0], C[0]);
+ *py = mid_pred(A[1], B[1], C[1]);
+ }
+ }
+
+ return mot_val;
+}
+
+static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n, int cbp){
+ MpegEncContext * const s= &w->s;
+ static const int sub_cbp_table[3]= {2,3,1};
+ int sub_cbp;
+
+ if(!cbp){
+ s->block_last_index[n] = -1;
+
+ return 0;
+ }
+
+ if(w->per_block_abt)
+ w->abt_type= decode012(&s->gb);
+#if 0
+ if(w->per_block_abt)
+ printf("B%d", w->abt_type);
+#endif
+ w->abt_type_table[n]= w->abt_type;
+
+ if(w->abt_type){
+// const uint8_t *scantable= w->abt_scantable[w->abt_type-1].permutated;
+ const uint8_t *scantable= w->abt_scantable[w->abt_type-1].scantable;
+// const uint8_t *scantable= w->abt_type-1 ? w->abt_scantable[1].permutated : w->abt_scantable[0].scantable;
+
+ sub_cbp= sub_cbp_table[ decode012(&s->gb) ];
+// printf("S%d", sub_cbp);
+
+ if(sub_cbp&1){
+ if (msmpeg4_decode_block(s, block, n, 1, scantable) < 0)
+ return -1;
+ }
+
+ if(sub_cbp&2){
+ if (msmpeg4_decode_block(s, w->abt_block2[n], n, 1, scantable) < 0)
+ return -1;
+ }
+ s->block_last_index[n] = 63;
+
+ return 0;
+ }else{
+ return msmpeg4_decode_block(s, block, n, 1, s->inter_scantable.permutated);
+ }
+}
+
+static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int stride, int n){
+ MpegEncContext * const s= &w->s;
+ uint8_t temp[2][64];
+ int i;
+
+ if(w->abt_type_table[n] && 0){
+ int a,b;
+
+ a= block1[0];
+ b= w->abt_block2[n][0];
+ block1[0]= a+b;
+ w->abt_block2[n][0]= a-b;
+ }
+
+ switch(w->abt_type_table[n]){
+ case 0:
+ if (s->block_last_index[n] >= 0) {
+ s->idct_add (dst, stride, block1);
+ }
+ break;
+ case 1:
+ simple_idct84_add(dst , stride, block1);
+ simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
+ memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
+ break;
+ case 2:
+ simple_idct48_add(dst , stride, block1);
+ simple_idct48_add(dst + 4 , stride, w->abt_block2[n]);
+ memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
+ break;
+ default:
+ fprintf(stderr, "internal error in WMV2 abt\n");
+ }
+}
+
+void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block1[6][64], uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr){
+ Wmv2Context * const w= (Wmv2Context*)s;
+
+ wmv2_add_block(w, block1[0], dest_y , s->linesize, 0);
+ wmv2_add_block(w, block1[1], dest_y + 8 , s->linesize, 1);
+ wmv2_add_block(w, block1[2], dest_y + 8*s->linesize, s->linesize, 2);
+ wmv2_add_block(w, block1[3], dest_y + 8 + 8*s->linesize, s->linesize, 3);
+
+ if(s->flags&CODEC_FLAG_GRAY) return;
+
+ wmv2_add_block(w, block1[4], dest_cb , s->uvlinesize, 4);
+ wmv2_add_block(w, block1[5], dest_cr , s->uvlinesize, 5);
+}
+
+void ff_mspel_motion(MpegEncContext *s,
+ UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
+ UINT8 **ref_picture, op_pixels_func (*pix_op)[4],
+ int motion_x, int motion_y, int h)
+{
+ Wmv2Context * const w= (Wmv2Context*)s;
+ UINT8 *ptr;
+ int dxy, offset, mx, my, src_x, src_y, v_edge_pos, linesize, uvlinesize;
+ int emu=0;
+
+ dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+ dxy = 2*dxy + w->hshift;
+ src_x = s->mb_x * 16 + (motion_x >> 1);
+ src_y = s->mb_y * 16 + (motion_y >> 1);
+
+ /* WARNING: do no forget half pels */
+ v_edge_pos = s->v_edge_pos;
+ src_x = clip(src_x, -16, s->width);
+ src_y = clip(src_y, -16, s->height);
+ linesize = s->linesize;
+ uvlinesize = s->uvlinesize;
+ ptr = ref_picture[0] + (src_y * linesize) + src_x;
+
+ if(s->flags&CODEC_FLAG_EMU_EDGE){
+ if(src_x<1 || src_y<1 || src_x + 17 >= s->h_edge_pos
+ || src_y + h+1 >= v_edge_pos){
+ ff_emulated_edge_mc(s, ptr - 1 - s->linesize, s->linesize, 19, 19,
+ src_x-1, src_y-1, s->h_edge_pos, s->v_edge_pos);
+ ptr= s->edge_emu_buffer + 1 + s->linesize;
+ emu=1;
+ }
+ }
+
+ s->dsp.put_mspel_pixels_tab[dxy](dest_y , ptr , linesize);
+ s->dsp.put_mspel_pixels_tab[dxy](dest_y+8 , ptr+8 , linesize);
+ s->dsp.put_mspel_pixels_tab[dxy](dest_y +8*linesize, ptr +8*linesize, linesize);
+ s->dsp.put_mspel_pixels_tab[dxy](dest_y+8+8*linesize, ptr+8+8*linesize, linesize);
+
+ if(s->flags&CODEC_FLAG_GRAY) return;
+
+ if (s->out_format == FMT_H263) {
+ dxy = 0;
+ if ((motion_x & 3) != 0)
+ dxy |= 1;
+ if ((motion_y & 3) != 0)
+ dxy |= 2;
+ mx = motion_x >> 2;
+ my = motion_y >> 2;
+ } else {
+ mx = motion_x / 2;
+ my = motion_y / 2;
+ dxy = ((my & 1) << 1) | (mx & 1);
+ mx >>= 1;
+ my >>= 1;
+ }
+
+ src_x = s->mb_x * 8 + mx;
+ src_y = s->mb_y * 8 + my;
+ src_x = clip(src_x, -8, s->width >> 1);
+ if (src_x == (s->width >> 1))
+ dxy &= ~1;
+ src_y = clip(src_y, -8, s->height >> 1);
+ if (src_y == (s->height >> 1))
+ dxy &= ~2;
+ offset = (src_y * uvlinesize) + src_x;
+ ptr = ref_picture[1] + offset;
+ if(emu){
+ ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9,
+ src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+ ptr= s->edge_emu_buffer;
+ }
+ pix_op[1][dxy](dest_cb, ptr, uvlinesize, h >> 1);
+
+ ptr = ref_picture[2] + offset;
+ if(emu){
+ ff_emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9,
+ src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+ ptr= s->edge_emu_buffer;
+ }
+ pix_op[1][dxy](dest_cr, ptr, uvlinesize, h >> 1);
+}
+
+
+static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
+{
+ Wmv2Context * const w= (Wmv2Context*)s;
+ int cbp, code, i;
+ UINT8 *coded_val;
+
+ if(w->j_type) return 0;
+
+ s->error_status_table[s->mb_x + s->mb_y*s->mb_width]= 0;
+
+ if (s->pict_type == P_TYPE) {
+ if(s->mb_type[s->mb_y * s->mb_width + s->mb_x]&MB_TYPE_SKIPED){
+ /* skip mb */
+ s->mb_intra = 0;
+ for(i=0;i<6;i++)
+ s->block_last_index[i] = -1;
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mv[0][0][0] = 0;
+ s->mv[0][0][1] = 0;
+ s->mb_skiped = 1;
+ return 0;
+ }
+
+ code = get_vlc2(&s->gb, mb_non_intra_vlc[w->cbp_table_index].table, MB_NON_INTRA_VLC_BITS, 3);
+ if (code < 0)
+ return -1;
+ s->mb_intra = (~code & 0x40) >> 6;
+
+ cbp = code & 0x3f;
+ } else {
+ s->mb_intra = 1;
+ code = get_vlc2(&s->gb, mb_intra_vlc.table, MB_INTRA_VLC_BITS, 2);
+ if (code < 0){
+ fprintf(stderr, "II-cbp illegal at %d %d\n", s->mb_x, s->mb_y);
+ return -1;
+ }
+ /* predict coded block pattern */
+ cbp = 0;
+ for(i=0;i<6;i++) {
+ int val = ((code >> (5 - i)) & 1);
+ if (i < 4) {
+ int pred = coded_block_pred(s, i, &coded_val);
+ val = val ^ pred;
+ *coded_val = val;
+ }
+ cbp |= val << (5 - i);
+ }
+ }
+
+ if (!s->mb_intra) {
+ int mx, my;
+//printf("P at %d %d\n", s->mb_x, s->mb_y);
+ wmv2_pred_motion(w, &mx, &my);
+
+ if(cbp){
+ if(s->per_mb_rl_table){
+ s->rl_table_index = decode012(&s->gb);
+ s->rl_chroma_table_index = s->rl_table_index;
+ }
+
+ if(w->abt_flag && w->per_mb_abt){
+ w->per_block_abt= get_bits1(&s->gb);
+ if(!w->per_block_abt)
+ w->abt_type= decode012(&s->gb);
+ }else
+ w->per_block_abt=0;
+ }
+
+ if (wmv2_decode_motion(w, &mx, &my) < 0)
+ return -1;
+
+ s->mv_dir = MV_DIR_FORWARD;
+ s->mv_type = MV_TYPE_16X16;
+ s->mv[0][0][0] = mx;
+ s->mv[0][0][1] = my;
+
+ for (i = 0; i < 6; i++) {
+ if (wmv2_decode_inter_block(w, block[i], i, (cbp >> (5 - i)) & 1) < 0)
+ {
+ fprintf(stderr,"\nerror while decoding inter block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+ return -1;
+ }
+ }
+ } else {
+//if(s->pict_type==P_TYPE)
+// printf("%d%d ", s->inter_intra_pred, cbp);
+//printf("I at %d %d %d %06X\n", s->mb_x, s->mb_y, ((cbp&3)? 1 : 0) +((cbp&0x3C)? 2 : 0), show_bits(&s->gb, 24));
+ s->ac_pred = get_bits1(&s->gb);
+ if(s->inter_intra_pred){
+ s->h263_aic_dir= get_vlc2(&s->gb, inter_intra_vlc.table, INTER_INTRA_VLC_BITS, 1);
+// printf("%d%d %d %d/", s->ac_pred, s->h263_aic_dir, s->mb_x, s->mb_y);
+ }
+ if(s->per_mb_rl_table && cbp){
+ s->rl_table_index = decode012(&s->gb);
+ s->rl_chroma_table_index = s->rl_table_index;
+ }
+
+ for (i = 0; i < 6; i++) {
+ if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
+ {
+ fprintf(stderr,"\nerror while decoding intra block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int wmv2_decode_init(AVCodecContext *avctx){
+ Wmv2Context * const w= avctx->priv_data;
+
+ if(ff_h263_decode_init(avctx) < 0)
+ return -1;
+
+ wmv2_common_init(w);
+
+ return 0;
+}
+
+AVCodec wmv2_decoder = {
+ "wmv2",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_WMV2,
+ sizeof(Wmv2Context),
+ wmv2_decode_init,
+ NULL,
+ ff_h263_decode_end,
+ ff_h263_decode_frame,
+ CODEC_CAP_DRAW_HORIZ_BAND | CODEC_CAP_DR1,
+};
+
+AVCodec wmv2_encoder = {
+ "wmv2",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_WMV2,
+ sizeof(Wmv2Context),
+ wmv2_encode_init,
+ MPV_encode_picture,
+ MPV_encode_end,
+};
+