aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2004-08-22 17:16:03 +0000
committerMichael Niedermayer <michaelni@gmx.at>2004-08-22 17:16:03 +0000
commit26efc54e4a6811f29d07d875185190175cb513e6 (patch)
treeae92d157d94487dfdc2cee4ea3cbb7c1f99d1144
parentc931e6086012d955e2fd0c896ff1fdb9099b4204 (diff)
downloadffmpeg-26efc54e4a6811f29d07d875185190175cb513e6.tar.gz
4x4 SSE compare function
wavelet based compare functions make epzs_motion_search() more flexible so it can be used for a wider range of block sizes make get_penalty_factor() independant of MpegEncContext Originally committed as revision 3410 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/avcodec.h2
-rw-r--r--libavcodec/dsputil.c127
-rw-r--r--libavcodec/dsputil.h25
-rw-r--r--libavcodec/motion_est.c41
-rw-r--r--libavcodec/motion_est_template.c24
5 files changed, 175 insertions, 44 deletions
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 64218a099c..e2da0f85a2 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -1262,6 +1262,8 @@ typedef struct AVCodecContext {
#define FF_CMP_VSAD 8
#define FF_CMP_VSSE 9
#define FF_CMP_NSSE 10
+#define FF_CMP_W53 11
+#define FF_CMP_W97 12
#define FF_CMP_CHROMA 256
/**
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index c4b627b683..0fa95fffe8 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -219,6 +219,23 @@ static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
}
}
+static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
+{
+ int s, i;
+ uint32_t *sq = squareTbl + 256;
+
+ s = 0;
+ for (i = 0; i < h; i++) {
+ s += sq[pix1[0] - pix2[0]];
+ s += sq[pix1[1] - pix2[1]];
+ s += sq[pix1[2] - pix2[2]];
+ s += sq[pix1[3] - pix2[3]];
+ pix1 += line_size;
+ pix2 += line_size;
+ }
+ return s;
+}
+
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
{
int s, i;
@@ -270,6 +287,103 @@ static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
return s;
}
+
+static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
+ int s, i, j;
+ const int dec_count= w==8 ? 3 : 4;
+ int tmp[16*16];
+#if 0
+ int level, ori;
+ static const int scale[2][2][4][4]={
+ {
+ {
+ //8x8 dec=3
+ {268, 239, 239, 213},
+ { 0, 224, 224, 152},
+ { 0, 135, 135, 110},
+ },{
+ //16x16 dec=4
+ {344, 310, 310, 280},
+ { 0, 320, 320, 228},
+ { 0, 175, 175, 136},
+ { 0, 129, 129, 102},
+ }
+ },{
+ {//FIXME 5/3
+ //8x8 dec=3
+ {275, 245, 245, 218},
+ { 0, 230, 230, 156},
+ { 0, 138, 138, 113},
+ },{
+ //16x16 dec=4
+ {352, 317, 317, 286},
+ { 0, 328, 328, 233},
+ { 0, 180, 180, 140},
+ { 0, 132, 132, 105},
+ }
+ }
+ };
+#endif
+
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j+=4) {
+ tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
+ tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
+ tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
+ tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
+ }
+ pix1 += line_size;
+ pix2 += line_size;
+ }
+ ff_spatial_dwt(tmp, w, h, 16, type, dec_count);
+
+ s=0;
+#if 0
+ for(level=0; level<dec_count; level++){
+ for(ori= level ? 1 : 0; ori<4; ori++){
+ int sx= (ori&1) ? 1<<level: 0;
+ int stride= 16<<(dec_count-level);
+ int sy= (ori&2) ? stride>>1 : 0;
+ int size= 1<<level;
+
+ for(i=0; i<size; i++){
+ for(j=0; j<size; j++){
+ int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
+ s += ABS(v);
+ }
+ }
+ }
+ }
+#endif
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j+=4) {
+ s+= ABS(tmp[16*i+j+0]);
+ s+= ABS(tmp[16*i+j+1]);
+ s+= ABS(tmp[16*i+j+2]);
+ s+= ABS(tmp[16*i+j+3]);
+ }
+ }
+ assert(s>=0);
+
+ return s>>2;
+}
+
+static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+ return w_c(v, pix1, pix2, line_size, 8, h, 1);
+}
+
+static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+ return w_c(v, pix1, pix2, line_size, 8, h, 0);
+}
+
+static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+ return w_c(v, pix1, pix2, line_size, 16, h, 1);
+}
+
+static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
+ return w_c(v, pix1, pix2, line_size, 16, h, 0);
+}
+
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
{
int i;
@@ -2733,6 +2847,12 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
case FF_CMP_NSSE:
cmp[i]= c->nsse[i];
break;
+ case FF_CMP_W53:
+ cmp[i]= c->w53[i];
+ break;
+ case FF_CMP_W97:
+ cmp[i]= c->w97[i];
+ break;
default:
av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
}
@@ -3359,6 +3479,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->sad[1]= pix_abs8_c;
c->sse[0]= sse16_c;
c->sse[1]= sse8_c;
+ c->sse[2]= sse4_c;
SET_CMP_FUNC(quant_psnr)
SET_CMP_FUNC(rd)
SET_CMP_FUNC(bit)
@@ -3368,7 +3489,11 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->vsse[4]= vsse_intra16_c;
c->nsse[0]= nsse16_c;
c->nsse[1]= nsse8_c;
-
+ c->w53[0]= w53_16_c;
+ c->w53[1]= w53_8_c;
+ c->w97[0]= w97_16_c;
+ c->w97[1]= w97_8_c;
+
c->add_bytes= add_bytes_c;
c->diff_bytes= diff_bytes_c;
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 2ec3dc50c1..494c6f61c5 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -163,6 +163,8 @@ typedef struct DSPContext {
me_cmp_func vsad[5];
me_cmp_func vsse[5];
me_cmp_func nsse[5];
+ me_cmp_func w53[5];
+ me_cmp_func w97[5];
me_cmp_func me_pre_cmp[5];
me_cmp_func me_cmp[5];
@@ -351,6 +353,29 @@ static inline uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1);
}
+static inline int get_penalty_factor(int lambda, int lambda2, int type){
+ switch(type&0xFF){
+ default:
+ case FF_CMP_SAD:
+ return lambda>>FF_LAMBDA_SHIFT;
+ case FF_CMP_DCT:
+ return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
+ case FF_CMP_W53:
+ return (4*lambda)>>(FF_LAMBDA_SHIFT);
+ case FF_CMP_W97:
+ return (2*lambda)>>(FF_LAMBDA_SHIFT);
+ case FF_CMP_SATD:
+ return (2*lambda)>>FF_LAMBDA_SHIFT;
+ case FF_CMP_RD:
+ case FF_CMP_PSNR:
+ case FF_CMP_SSE:
+ case FF_CMP_NSSE:
+ return lambda2>>FF_LAMBDA_SHIFT;
+ case FF_CMP_BIT:
+ return 1;
+ }
+}
+
/**
* Empty mmx state.
* this must be called between any dsp function and float/double code.
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 13f7deb8d8..9239503373 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -219,25 +219,6 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
#include "motion_est_template.c"
-static inline int get_penalty_factor(MpegEncContext *s, int type){
- switch(type&0xFF){
- default:
- case FF_CMP_SAD:
- return s->lambda>>FF_LAMBDA_SHIFT;
- case FF_CMP_DCT:
- return (3*s->lambda)>>(FF_LAMBDA_SHIFT+1);
- case FF_CMP_SATD:
- return (2*s->lambda)>>FF_LAMBDA_SHIFT;
- case FF_CMP_RD:
- case FF_CMP_PSNR:
- case FF_CMP_SSE:
- case FF_CMP_NSSE:
- return s->lambda2>>FF_LAMBDA_SHIFT;
- case FF_CMP_BIT:
- return 1;
- }
-}
-
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
return 0;
}
@@ -1161,9 +1142,9 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
assert(s->linesize == c->stride);
assert(s->uvlinesize == c->uvstride);
- c->penalty_factor = get_penalty_factor(s, c->avctx->me_cmp);
- c->sub_penalty_factor= get_penalty_factor(s, c->avctx->me_sub_cmp);
- c->mb_penalty_factor = get_penalty_factor(s, c->avctx->mb_cmp);
+ c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
+ c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
+ c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
get_limits(s, 16*mb_x, 16*mb_y);
@@ -1256,7 +1237,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
}
}
- dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift);
+ dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
break;
}
@@ -1424,7 +1405,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
assert(s->quarter_sample==0 || s->quarter_sample==1);
- c->pre_penalty_factor = get_penalty_factor(s, c->avctx->me_pre_cmp);
+ c->pre_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
get_limits(s, 16*mb_x, 16*mb_y);
@@ -1457,7 +1438,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
c->pred_y = P_MEDIAN[1];
}
- dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift);
+ dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
s->p_mv_table[xy][0] = mx<<shift;
s->p_mv_table[xy][1] = my<<shift;
@@ -1477,9 +1458,9 @@ static int ff_estimate_motion_b(MpegEncContext * s,
uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
int mv_scale;
- c->penalty_factor = get_penalty_factor(s, c->avctx->me_cmp);
- c->sub_penalty_factor= get_penalty_factor(s, c->avctx->me_sub_cmp);
- c->mb_penalty_factor = get_penalty_factor(s, c->avctx->mb_cmp);
+ c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
+ c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
+ c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
c->current_mv_penalty= mv_penalty;
get_limits(s, 16*mb_x, 16*mb_y);
@@ -1540,7 +1521,7 @@ static int ff_estimate_motion_b(MpegEncContext * s,
mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
}
- dmin = epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale);
+ dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
break;
}
@@ -1731,7 +1712,7 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
}
- dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift));
+ dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
if(c->sub_flags&FLAG_QPEL)
dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
else
diff --git a/libavcodec/motion_est_template.c b/libavcodec/motion_est_template.c
index b5ee885730..db7c2a4cbc 100644
--- a/libavcodec/motion_est_template.c
+++ b/libavcodec/motion_est_template.c
@@ -851,15 +851,13 @@ static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
- int ref_mv_scale, int flags)
+ int ref_mv_scale, int flags, int size, int h)
{
MotionEstContext * const c= &s->me;
int best[2]={0, 0};
int d, dmin;
int map_generation;
const int penalty_factor= c->penalty_factor;
- const int size=0;
- const int h=16;
const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
me_cmp_func cmpf, chroma_cmpf;
@@ -872,6 +870,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
map_generation= update_map_generation(c);
+ assert(cmpf);
dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
map[0]= map_generation;
score_map[0]= dmin;
@@ -882,7 +881,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
}else{
- if(dmin<256 && ( P_LEFT[0] |P_LEFT[1]
+ if(dmin<h*h && ( P_LEFT[0] |P_LEFT[1]
|P_TOP[0] |P_TOP[1]
|P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
*mx_ptr= 0;
@@ -891,7 +890,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
return dmin;
}
CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
- if(dmin>256*2){
+ if(dmin>h*h*2){
CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
@@ -899,7 +898,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
}
}
- if(dmin>256*4){
+ if(dmin>h*h*4){
if(c->pre_pass){
CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
(last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
@@ -948,19 +947,18 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
}
//this function is dedicated to the braindamaged gcc
-static inline int epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
+inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
- int ref_mv_scale)
+ int ref_mv_scale, int size, int h)
{
MotionEstContext * const c= &s->me;
//FIXME convert other functions in the same way if faster
- switch(c->flags){
- case 0:
- return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0);
+ if(c->flags==0 && h==16 && size==0){
+ return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
// case FLAG_QPEL:
// return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
- default:
- return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags);
+ }else{
+ return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
}
}