aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2006-10-07 11:15:10 +0000
committerMichael Niedermayer <michaelni@gmx.at>2006-10-07 11:15:10 +0000
commitec8f483ab59a893b033d1c304c2f1345a3a09034 (patch)
tree7f9c1a66c29e525352e2e0503449acc8fd09553c
parent21423ad9b7743f50ead7cccb107154913cec290f (diff)
downloadffmpeg-ec8f483ab59a893b033d1c304c2f1345a3a09034.tar.gz
several x86 renorm_cabac_decoder_once optimizations
START/STOP_TIMER benchmarking code for them please benchmark on P4 & athlon (ill remove the benchmarking code and the always slower variants as soon as p4/athlon benchmarks have been posted or commited) Originally committed as revision 6573 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/cabac.h64
1 files changed, 63 insertions, 1 deletions
diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h
index ae94a5face..333b82a04e 100644
--- a/libavcodec/cabac.h
+++ b/libavcodec/cabac.h
@@ -295,15 +295,77 @@ static inline void renorm_cabac_decoder(CABACContext *c){
}
static inline void renorm_cabac_decoder_once(CABACContext *c){
+#ifdef ARCH_X86
+ int temp;
+#if 0
+ //P3:683
+ asm(
+ "lea -0x20000(%0), %2 \n\t"
+ "shr $31, %2 \n\t" //FIXME 31->63 for x86-64
+ "shl %%cl, %0 \n\t"
+ "shl %%cl, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+c"(temp)
+ );
+#elif 0
+ //P3:680
+ asm(
+ "cmp $0x20000, %0 \n\t"
+ "setb %%cl \n\t" //FIXME 31->63 for x86-64
+ "shl %%cl, %0 \n\t"
+ "shl %%cl, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+c"(temp)
+ );
+#elif 1
+ int temp2;
+ //P3:665
+ asm(
+ "lea -0x20000(%0), %%eax \n\t"
+ "cdq \n\t"
+ "mov %0, %%eax \n\t"
+ "and %%edx, %0 \n\t"
+ "and %1, %%edx \n\t"
+ "add %%eax, %0 \n\t"
+ "add %%edx, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+ );
+#elif 0
+ int temp2;
+ //P3:673
+ asm(
+ "cmp $0x20000, %0 \n\t"
+ "sbb %%edx, %%edx \n\t"
+ "mov %0, %%eax \n\t"
+ "and %%edx, %0 \n\t"
+ "and %1, %%edx \n\t"
+ "add %%eax, %0 \n\t"
+ "add %%edx, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+ );
+#else
+ int temp2;
+ //P3:677
+ asm(
+ "cmp $0x20000, %0 \n\t"
+ "lea (%0, %0), %%eax \n\t"
+ "lea (%1, %1), %%edx \n\t"
+ "cmovb %%eax, %0 \n\t"
+ "cmovb %%edx, %1 \n\t"
+ : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
+ );
+#endif
+#else
+ //P3:675
int shift= (uint32_t)(c->range - (0x200 << CABAC_BITS))>>31;
c->range<<= shift;
c->low <<= shift;
+#endif
if(!(c->low & CABAC_MASK))
refill(c);
}
static int get_cabac(CABACContext *c, uint8_t * const state){
//FIXME gcc generates duplicate load/stores for c->low and c->range
+START_TIMER
int s = *state;
int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
int bit, lps_mask attribute_unused;
@@ -342,7 +404,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){
if(!(c->low & CABAC_MASK))
refill2(c);
#endif
-
+STOP_TIMER("get_cabac")
return bit;
}