aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2010-01-11 02:52:50 +0000
committerMichael Niedermayer <michaelni@gmx.at>2010-01-11 02:52:50 +0000
commit2c8077621b6466da205ba26fd20a9c906bb71893 (patch)
treedc35867cdde9a219aed6817898b25ada35da5213
parent233a8b3d6bb19fd80dd7760389051fcde766f06c (diff)
downloadffmpeg-2c8077621b6466da205ba26fd20a9c906bb71893.tar.gz
Optimize ff_snow_horizontal_compose97i.
this makes the 9/7 C wavelet at the decoder side 22% faster. The old code is changed to match the new in terms of the order of operations (which also makes it sligtly faster) Originally committed as revision 21132 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/snow.c34
1 files changed, 30 insertions, 4 deletions
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index a9aa2eb368..53c7deba52 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -1120,10 +1120,36 @@ void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
IDWTELEM temp[width];
const int w2= (width+1)>>1;
- inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
- inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
- inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
- inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
+#if 0 //maybe more understadable but slower
+ inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
+ inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1);
+
+ inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1);
+ inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0);
+#else
+ int x;
+ temp[0] = b[0] - ((3*b[w2]+2)>>2);
+ for(x=1; x<(width>>1); x++){
+ temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3);
+ temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
+ }
+ if(width&1){
+ temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2);
+ temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
+ }else
+ temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
+
+ b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
+ for(x=2; x<width-1; x+=2){
+ b[x ] = temp[x ] + ((4*temp[x ] + temp[x-1] + temp[x+1]+8)>>4);
+ b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
+ }
+ if(width&1){
+ b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3);
+ b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
+ }else
+ b[x-1] = temp[x-1] + 3*b [x-2];
+#endif
}
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){