diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2007-09-08 14:51:13 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2007-09-08 14:51:13 +0000 |
commit | 7d7f57d93932684896e87ee0859086fabe6529e6 (patch) | |
tree | d74d67ddb25d8834f9b210150f0d4a7f108df3ce /libavcodec/snow.c | |
parent | 4770a97aae4846904f7d4fd2ea2a74a51a85d787 (diff) | |
download | ffmpeg-7d7f57d93932684896e87ee0859086fabe6529e6.tar.gz |
store halfpel filter coefficients in the header as well as the
flag for diagonal interpolation
the primary reason for this change is that previously MC up to 1/4 pel
matched H.264 exactly and that increases the risk of stumbling over
patents
secondly this allows 0.10 db or more quality gain by choosing a longer
filter and the filter could also be chosen optimally for each frame
though that of course would cause speed loss at the decoder and encoder
side ...
Originally committed as revision 10436 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/snow.c')
-rw-r--r-- | libavcodec/snow.c | 152 |
1 files changed, 114 insertions, 38 deletions
diff --git a/libavcodec/snow.c b/libavcodec/snow.c index 212064b036..3091c1697c 100644 --- a/libavcodec/snow.c +++ b/libavcodec/snow.c @@ -394,7 +394,7 @@ static const BlockNode null_block= { //FIXME add border maybe #define LOG2_MB_SIZE 4 #define MB_SIZE (1<<LOG2_MB_SIZE) #define ENCODER_EXTRA_BITS 4 -#define HTAPS 6 +#define HTAPS 8 typedef struct x_and_coeff{ int16_t x; @@ -421,6 +421,15 @@ typedef struct Plane{ int width; int height; SubBand band[MAX_DECOMPOSITIONS][4]; + + int htaps; + int8_t hcoeff[HTAPS/2]; + int diag_mc; + int fast_mc; + + int last_htaps; + int8_t last_hcoeff[HTAPS/2]; + int last_diag_mc; }Plane; typedef struct SnowContext{ @@ -2143,7 +2152,7 @@ static void decode_blocks(SnowContext *s){ } } -static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ +static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){ const static uint8_t weight[64]={ 8,7,6,5,4,3,2,1, 7,7,0,0,0,0,0,1, @@ -2193,11 +2202,12 @@ START_TIMER l= brane[dx + 16*dy]>>4; b= needs[l] | needs[r]; + if(p && !p->diag_mc) + b= 15; if(b&5){ for(y=0; y < b_h+HTAPS-1; y++){ for(x=0; x < b_w; x++){ - int a_2=src[x + HTAPS/2-5]; int a_1=src[x + HTAPS/2-4]; int a0= src[x + HTAPS/2-3]; int a1= src[x + HTAPS/2-2]; @@ -2206,15 +2216,17 @@ START_TIMER int a4= src[x + HTAPS/2+1]; int a5= src[x + HTAPS/2+2]; int a6= src[x + HTAPS/2+3]; - int a7= src[x + HTAPS/2+4]; -#if HTAPS==6 - int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); -#else - int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); -#endif + int am=0; + if(!p || p->fast_mc){ + am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); + tmpI[x]= am; + am= (am+16)>>5; + }else{ + am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); + tmpI[x]= am; + am= (am+32)>>6; + } - tmpI[x]= am; - am= (am+16)>>5; if(am&(~255)) am= ~(am>>31); tmp2[x]= am; } @@ -2230,7 +2242,6 @@ START_TIMER if(b&2){ for(y=0; y < b_h; y++){ for(x=0; x < b_w+1; x++){ - int a_2=src[x + (HTAPS/2-5)*stride]; int a_1=src[x + (HTAPS/2-4)*stride]; int a0= src[x + (HTAPS/2-3)*stride]; int a1= src[x + (HTAPS/2-2)*stride]; @@ -2239,14 +2250,12 @@ START_TIMER int a4= src[x + (HTAPS/2+1)*stride]; int a5= src[x + (HTAPS/2+2)*stride]; int a6= src[x + (HTAPS/2+3)*stride]; - int a7= src[x + (HTAPS/2+4)*stride]; -#if HTAPS==6 - int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); -#else - int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); -#endif + int am=0; + if(!p || p->fast_mc) + am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; + else + am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; - am= (am + 16)>>5; if(am&(~255)) am= ~(am>>31); tmp2[x]= am; } @@ -2261,7 +2270,6 @@ START_TIMER if(b&4){ for(y=0; y < b_h; y++){ for(x=0; x < b_w; x++){ - int a_2=tmpI[x + (HTAPS/2-5)*64]; int a_1=tmpI[x + (HTAPS/2-4)*64]; int a0= tmpI[x + (HTAPS/2-3)*64]; int a1= tmpI[x + (HTAPS/2-2)*64]; @@ -2270,13 +2278,11 @@ START_TIMER int a4= tmpI[x + (HTAPS/2+1)*64]; int a5= tmpI[x + (HTAPS/2+2)*64]; int a6= tmpI[x + (HTAPS/2+3)*64]; - int a7= tmpI[x + (HTAPS/2+4)*64]; -#if HTAPS==6 - int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); -#else - int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); -#endif - am= (am + 512)>>10; + int am=0; + if(!p || p->fast_mc) + am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; + else + am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; if(am&(~255)) am= ~(am>>31); tmp2[x]= am; } @@ -2336,7 +2342,7 @@ STOP_TIMER("mc_block") static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ uint8_t tmp[stride*(b_w+HTAPS-1)];\ assert(h==b_w);\ - mc_block(dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ + mc_block(NULL, dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ } mca( 0, 0,16) @@ -2407,23 +2413,23 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, i // assert(!(b_w&(b_w-1))); assert(b_w>1 && b_h>1); assert(tab_index>=0 && tab_index<4 || b_w==32); - if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || HTAPS != 6) - mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); + if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) + mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); else if(b_w==32){ int y; for(y=0; y<b_h; y+=16){ - s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride); - s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); } }else if(b_w==b_h) - s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); else if(b_w==2*b_h){ - s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); - s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); }else{ assert(2*b_w==b_h); - s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride); - s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); } } } @@ -3514,7 +3520,7 @@ static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int } static void encode_header(SnowContext *s){ - int plane_index, level, orientation; + int plane_index, level, orientation, i; uint8_t kstate[32]; memset(kstate, MID_STATE, sizeof(kstate)); @@ -3527,6 +3533,12 @@ static void encode_header(SnowContext *s){ s->last_qbias= s->last_mv_scale= s->last_block_max_depth= 0; + for(plane_index=0; plane_index<2; plane_index++){ + Plane *p= &s->plane[plane_index]; + p->last_htaps=0; + p->last_diag_mc=0; + memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); + } } if(s->keyframe){ put_symbol(&s->c, s->header_state, s->version, 0); @@ -3550,6 +3562,32 @@ static void encode_header(SnowContext *s){ } } } + + if(!s->keyframe){ + int update_mc=0; + for(plane_index=0; plane_index<2; plane_index++){ + Plane *p= &s->plane[plane_index]; + update_mc |= p->last_htaps != p->htaps; + update_mc |= p->last_diag_mc != p->diag_mc; + update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); + } + if(!s->always_reset) + put_rac(&s->c, s->header_state, update_mc); + if(update_mc){ + for(plane_index=0; plane_index<2; plane_index++){ + Plane *p= &s->plane[plane_index]; + put_rac(&s->c, s->header_state, p->diag_mc); + put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); + for(i= p->htaps/2; i; i--) + put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); + + p->last_diag_mc= p->diag_mc; + p->last_htaps= p->htaps; + memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); + } + } + } + put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); @@ -3608,6 +3646,28 @@ static int decode_header(SnowContext *s){ } } + if(!s->keyframe){ + if(s->always_reset || get_rac(&s->c, s->header_state)){ + for(plane_index=0; plane_index<2; plane_index++){ + int htaps, i, sum=0, absum=0; + Plane *p= &s->plane[plane_index]; + p->diag_mc= get_rac(&s->c, s->header_state); + htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; + if((unsigned)htaps > HTAPS || htaps==0) + return -1; + p->htaps= htaps; + for(i= htaps/2; i; i--){ + p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); + sum += p->hcoeff[i]; + } + p->hcoeff[0]= 32-sum; + } + s->plane[2].diag_mc= s->plane[1].diag_mc; + s->plane[2].htaps = s->plane[1].htaps; + memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); + } + } + s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); if(s->spatial_decomposition_type > 1){ av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); @@ -3715,6 +3775,14 @@ static int common_init(AVCodecContext *avctx){ } s->plane[plane_index].width = w; s->plane[plane_index].height= h; + + s->plane[plane_index].diag_mc= 1; + s->plane[plane_index].htaps= 6; + s->plane[plane_index].hcoeff[0]= 40; + s->plane[plane_index].hcoeff[1]= -10; + s->plane[plane_index].hcoeff[2]= 2; + s->plane[plane_index].fast_mc= 1; + //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h); for(level=s->spatial_decomposition_count-1; level>=0; level--){ for(orientation=level ? 1 : 0; orientation<4; orientation++){ @@ -4354,6 +4422,14 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P decode_header(s); + + for(plane_index=0; plane_index<3; plane_index++){ + Plane *p= &s->plane[plane_index]; + p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 + && p->hcoeff[1]==-10 + && p->hcoeff[2]==2; + } + if(!s->block) alloc_blocks(s); frame_start(s); |