aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2007-09-08 03:14:20 +0000
committerMichael Niedermayer <michaelni@gmx.at>2007-09-08 03:14:20 +0000
commita68ca08e3051858132cfb7fb4514ce10781d5cbf (patch)
treec590f7e6958252865242908b2054a35a368b5dfc /libavcodec
parent7ae94d525c9e7258a178a5a1e315c91088f72136 (diff)
downloadffmpeg-a68ca08e3051858132cfb7fb4514ce10781d5cbf.tar.gz
cleanup mc_block()
perform interpolation steps in such an order that halfpel interpolation could be done per picture this also makes mc_block() match h.264 for the 1/4 pel cases so that the use of the h264 functions for some cases does not introduce a fantastic mess Originally committed as revision 10433 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/snow.c185
1 files changed, 150 insertions, 35 deletions
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 275c5bfab3..4f8d7cf088 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -2144,8 +2144,57 @@ static void decode_blocks(SnowContext *s){
}
static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
- int x, y;
+ const static uint8_t weight[64]={
+ 8,7,6,5,4,3,2,1,
+ 7,7,0,0,0,0,0,1,
+ 6,0,6,0,0,0,2,0,
+ 5,0,0,5,0,3,0,0,
+ 4,0,0,0,4,0,0,0,
+ 3,0,0,5,0,3,0,0,
+ 2,0,6,0,0,0,2,0,
+ 1,7,0,0,0,0,0,1,
+ };
+
+ const static uint8_t brane[256]={
+ 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
+ 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
+ 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
+ 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
+ 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
+ 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
+ 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
+ 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
+ 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
+ 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
+ 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
+ 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
+ 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
+ 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
+ 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
+ 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
+ };
+
+ const static uint8_t needs[16]={
+ 0,1,0,0,
+ 2,4,2,0,
+ 0,1,0,0,
+ 15
+ };
+
+ int x, y, b, r, l;
+ int16_t tmpIt [64*(32+HTAPS)];
+ uint8_t tmp2t[3][stride*(32+HTAPS)];
+ int16_t *tmpI= tmpIt;
+ uint8_t *tmp2= tmp2t[0];
+ uint8_t *hpel[11];
START_TIMER
+ assert(dx<16 && dy<16);
+ r= brane[dx + 16*dy]&15;
+ l= brane[dx + 16*dy]>>4;
+
+ b= needs[l] | needs[r];
+
+ if(b&5){
for(y=0; y < b_h+HTAPS-1; y++){
for(x=0; x < b_w; x++){
int a_2=src[x + HTAPS/2-5];
@@ -2170,36 +2219,33 @@ START_TIMER
// if(b_w==16) am= 8*(a1+a2);
- if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
- else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
-
- /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
+ tmpI[x]= am;
+ am= (am+16)>>5;
if(am&(~255)) am= ~(am>>31);
-
- tmp[x] = am;
-
-/* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
- else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
- else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
- else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
+ tmp2[x]= am;
}
- tmp += stride;
+ tmpI+= 64;
+ tmp2+= stride;
src += stride;
}
- tmp -= (b_h+HTAPS-1)*stride;
+ src -= stride*y;
+ }
+ src += HTAPS/2 - 1;
+ tmp2= tmp2t[1];
+ if(b&2){
for(y=0; y < b_h; y++){
- for(x=0; x < b_w; x++){
- int a_2=tmp[x + (HTAPS/2-5)*stride];
- int a_1=tmp[x + (HTAPS/2-4)*stride];
- int a0= tmp[x + (HTAPS/2-3)*stride];
- int a1= tmp[x + (HTAPS/2-2)*stride];
- int a2= tmp[x + (HTAPS/2-1)*stride];
- int a3= tmp[x + (HTAPS/2+0)*stride];
- int a4= tmp[x + (HTAPS/2+1)*stride];
- int a5= tmp[x + (HTAPS/2+2)*stride];
- int a6= tmp[x + (HTAPS/2+3)*stride];
- int a7= tmp[x + (HTAPS/2+4)*stride];
+ for(x=0; x < b_w+1; x++){
+ int a_2=src[x + (HTAPS/2-5)*stride];
+ int a_1=src[x + (HTAPS/2-4)*stride];
+ int a0= src[x + (HTAPS/2-3)*stride];
+ int a1= src[x + (HTAPS/2-2)*stride];
+ int a2= src[x + (HTAPS/2-1)*stride];
+ int a3= src[x + (HTAPS/2+0)*stride];
+ int a4= src[x + (HTAPS/2+1)*stride];
+ int a5= src[x + (HTAPS/2+2)*stride];
+ int a6= src[x + (HTAPS/2+3)*stride];
+ int a7= src[x + (HTAPS/2+4)*stride];
#if HTAPS==6
int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
#else
@@ -2211,19 +2257,88 @@ START_TIMER
// if(b_w==16) am= 8*(a1+a2);
- if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
- else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
-
+ am= (am + 16)>>5;
if(am&(~255)) am= ~(am>>31);
+ tmp2[x]= am;
+ }
+ src += stride;
+ tmp2+= stride;
+ }
+ src -= stride*y;
+ }
+ src += stride*(HTAPS/2 - 1);
+ tmp2= tmp2t[2];
+ tmpI= tmpIt;
+ if(b&4){
+ for(y=0; y < b_h; y++){
+ for(x=0; x < b_w; x++){
+ int a_2=tmpI[x + (HTAPS/2-5)*64];
+ int a_1=tmpI[x + (HTAPS/2-4)*64];
+ int a0= tmpI[x + (HTAPS/2-3)*64];
+ int a1= tmpI[x + (HTAPS/2-2)*64];
+ int a2= tmpI[x + (HTAPS/2-1)*64];
+ int a3= tmpI[x + (HTAPS/2+0)*64];
+ int a4= tmpI[x + (HTAPS/2+1)*64];
+ int a5= tmpI[x + (HTAPS/2+2)*64];
+ int a6= tmpI[x + (HTAPS/2+3)*64];
+ int a7= tmpI[x + (HTAPS/2+4)*64];
+#if HTAPS==6
+ int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+#else
+ int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
+#endif
+ am= (am + 512)>>10;
+ if(am&(~255)) am= ~(am>>31);
+ tmp2[x]= am;
+ }
+ tmpI+= 64;
+ tmp2+= stride;
+ }
+ }
- dst[x] = am;
-/* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
- else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
- else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
- else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
+ hpel[ 0]= src;
+ hpel[ 1]= tmp2t[0] + stride*(HTAPS/2-1);
+ hpel[ 2]= src + 1;
+
+ hpel[ 4]= tmp2t[1];
+ hpel[ 5]= tmp2t[2];
+ hpel[ 6]= tmp2t[1] + 1;
+
+ hpel[ 8]= src + stride;
+ hpel[ 9]= hpel[1] + stride;
+ hpel[10]= hpel[8] + 1;
+
+ if(b==15){
+ uint8_t *src1= hpel[dx/8 + dy/8*4 ];
+ uint8_t *src2= hpel[dx/8 + dy/8*4+1];
+ uint8_t *src3= hpel[dx/8 + dy/8*4+4];
+ uint8_t *src4= hpel[dx/8 + dy/8*4+5];
+ dx&=7;
+ dy&=7;
+ for(y=0; y < b_h; y++){
+ for(x=0; x < b_w; x++){
+ dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
+ (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
+ }
+ src1+=stride;
+ src2+=stride;
+ src3+=stride;
+ src4+=stride;
+ dst +=stride;
+ }
+ }else{
+ uint8_t *src1= hpel[l];
+ uint8_t *src2= hpel[r];
+ int a= weight[((dx&7) + (8*(dy&7)))];
+ int b= 8-a;
+ for(y=0; y < b_h; y++){
+ for(x=0; x < b_w; x++){
+ dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
+ }
+ src1+=stride;
+ src2+=stride;
+ dst +=stride;
}
- dst += stride;
- tmp += stride;
}
STOP_TIMER("mc_block")
}