diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2006-10-07 11:15:10 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2006-10-07 11:15:10 +0000 |
commit | ec8f483ab59a893b033d1c304c2f1345a3a09034 (patch) | |
tree | 7f9c1a66c29e525352e2e0503449acc8fd09553c /libavcodec/cabac.h | |
parent | 21423ad9b7743f50ead7cccb107154913cec290f (diff) | |
download | ffmpeg-ec8f483ab59a893b033d1c304c2f1345a3a09034.tar.gz |
several x86 renorm_cabac_decoder_once optimizations
START/STOP_TIMER benchmarking code for them
please benchmark on P4 & athlon
(ill remove the benchmarking code and the always slower variants as soon as p4/athlon benchmarks have been posted or commited)
Originally committed as revision 6573 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/cabac.h')
-rw-r--r-- | libavcodec/cabac.h | 64 |
1 files changed, 63 insertions, 1 deletions
diff --git a/libavcodec/cabac.h b/libavcodec/cabac.h index ae94a5face..333b82a04e 100644 --- a/libavcodec/cabac.h +++ b/libavcodec/cabac.h @@ -295,15 +295,77 @@ static inline void renorm_cabac_decoder(CABACContext *c){ } static inline void renorm_cabac_decoder_once(CABACContext *c){ +#ifdef ARCH_X86 + int temp; +#if 0 + //P3:683 + asm( + "lea -0x20000(%0), %2 \n\t" + "shr $31, %2 \n\t" //FIXME 31->63 for x86-64 + "shl %%cl, %0 \n\t" + "shl %%cl, %1 \n\t" + : "+r"(c->range), "+r"(c->low), "+c"(temp) + ); +#elif 0 + //P3:680 + asm( + "cmp $0x20000, %0 \n\t" + "setb %%cl \n\t" //FIXME 31->63 for x86-64 + "shl %%cl, %0 \n\t" + "shl %%cl, %1 \n\t" + : "+r"(c->range), "+r"(c->low), "+c"(temp) + ); +#elif 1 + int temp2; + //P3:665 + asm( + "lea -0x20000(%0), %%eax \n\t" + "cdq \n\t" + "mov %0, %%eax \n\t" + "and %%edx, %0 \n\t" + "and %1, %%edx \n\t" + "add %%eax, %0 \n\t" + "add %%edx, %1 \n\t" + : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) + ); +#elif 0 + int temp2; + //P3:673 + asm( + "cmp $0x20000, %0 \n\t" + "sbb %%edx, %%edx \n\t" + "mov %0, %%eax \n\t" + "and %%edx, %0 \n\t" + "and %1, %%edx \n\t" + "add %%eax, %0 \n\t" + "add %%edx, %1 \n\t" + : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) + ); +#else + int temp2; + //P3:677 + asm( + "cmp $0x20000, %0 \n\t" + "lea (%0, %0), %%eax \n\t" + "lea (%1, %1), %%edx \n\t" + "cmovb %%eax, %0 \n\t" + "cmovb %%edx, %1 \n\t" + : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) + ); +#endif +#else + //P3:675 int shift= (uint32_t)(c->range - (0x200 << CABAC_BITS))>>31; c->range<<= shift; c->low <<= shift; +#endif if(!(c->low & CABAC_MASK)) refill(c); } static int get_cabac(CABACContext *c, uint8_t * const state){ //FIXME gcc generates duplicate load/stores for c->low and c->range +START_TIMER int s = *state; int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); int bit, lps_mask attribute_unused; @@ -342,7 +404,7 @@ static int get_cabac(CABACContext *c, uint8_t * const state){ if(!(c->low & CABAC_MASK)) refill2(c); #endif - +STOP_TIMER("get_cabac") return bit; } |