diff options
author | BERO <bero@geocities.co.jp> | 2003-05-14 15:12:13 +0000 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2003-05-14 15:12:13 +0000 |
commit | d4961b35236beb67785410473442f5923ac8a488 (patch) | |
tree | 7b8474ce797d6e8b33ef0b3e6c19ff5b532ea375 | |
parent | b82cdc727855fc11f0110c46c39eadd00009ebc0 (diff) | |
download | ffmpeg-d4961b35236beb67785410473442f5923ac8a488.tar.gz |
fastdiv patch by (BERO <bero at geocities dot co dot jp>) with fixes & cleanup by me
Originally committed as revision 1879 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/common.h | 19 | ||||
-rw-r--r-- | libavcodec/h263.c | 18 | ||||
-rw-r--r-- | libavcodec/i386/mpegvideo_mmx.c | 1 | ||||
-rw-r--r-- | libavcodec/msmpeg4.c | 10 |
4 files changed, 26 insertions, 22 deletions
diff --git a/libavcodec/common.h b/libavcodec/common.h index 1a8202d709..092026d78e 100644 --- a/libavcodec/common.h +++ b/libavcodec/common.h @@ -197,6 +197,25 @@ inline void dprintf(const char* fmt,...) {} #define FFMAX(a,b) ((a) > (b) ? (a) : (b)) #define FFMIN(a,b) ((a) > (b) ? (b) : (a)) +extern const uint32_t inverse[256]; + +#ifdef ARCH_X86 +# define FASTDIV(a,b) \ + ({\ + int ret,dmy;\ + asm volatile(\ + "mull %3"\ + :"=d"(ret),"=a"(dmy)\ + :"1"(a),"g"(inverse[b])\ + );\ + ret;\ + }) +#elif defined(CONFIG_FASTDIV) +# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a)*inverse[b])>>32)) +#else +# define FASTDIV(a,b) ((a)/(b)) +#endif + #ifdef ARCH_X86 // avoid +32 for shift optimization (gcc should do that ...) static inline int32_t NEG_SSR32( int32_t a, int8_t s){ diff --git a/libavcodec/h263.c b/libavcodec/h263.c index f6f2efceea..a48f9a93bf 100644 --- a/libavcodec/h263.c +++ b/libavcodec/h263.c @@ -73,8 +73,6 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, DCTELEM *block, int n, static void mpeg4_decode_sprite_trajectory(MpegEncContext * s); static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr); -extern uint32_t inverse[256]; - #ifdef CONFIG_ENCODERS static uint8_t uni_DCtab_lum_len[512]; static uint8_t uni_DCtab_chrom_len[512]; @@ -1823,7 +1821,6 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ { int a, b, c, wrap, pred, scale; uint16_t *dc_val; - int dummy; /* find prediction */ if (n < 4) { @@ -1859,16 +1856,7 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ *dir_ptr = 0; /* left */ } /* we assume pred is positive */ -#ifdef ARCH_X86 - asm volatile ( - "xorl %%edx, %%edx \n\t" - "mul %%ecx \n\t" - : "=d" (pred), "=a"(dummy) - : "a" (pred + (scale >> 1)), "c" (inverse[scale]) - ); -#else - pred = (pred + (scale >> 1)) / scale; -#endif + pred = FASTDIV((pred + (scale >> 1)), scale); /* prepare address for prediction update */ *dc_val_ptr = &dc_val[0]; @@ -3668,8 +3656,8 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block, /* DC coef */ if(s->partitioned_frame){ level = s->dc_val[0][ s->block_index[n] ]; - if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs - else level= (level + (s->c_dc_scale>>1))/s->c_dc_scale; + if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale); + else level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale); dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32; }else{ level = mpeg4_decode_dc(s, n, &dc_pred_dir); diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/i386/mpegvideo_mmx.c index 8e452b4991..d2f477b7b3 100644 --- a/libavcodec/i386/mpegvideo_mmx.c +++ b/libavcodec/i386/mpegvideo_mmx.c @@ -26,7 +26,6 @@ extern uint8_t zigzag_direct_noperm[64]; extern uint16_t inv_zigzag_direct16[64]; -extern uint32_t inverse[256]; static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c index e4daec4e8f..093ee985ca 100644 --- a/libavcodec/msmpeg4.c +++ b/libavcodec/msmpeg4.c @@ -78,8 +78,6 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]); -extern uint32_t inverse[256]; - #ifdef DEBUG int intra_count = 0; @@ -699,7 +697,7 @@ static int get_dc(uint8_t *src, int stride, int scale) sum+=src[x + y*stride]; } } - return (sum + (scale>>1))/scale; + return FASTDIV((sum + (scale>>1)), scale); } /* dir = 0: left, dir = 1: top prediction */ @@ -763,9 +761,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, b = (b + (8 >> 1)) / 8; c = (c + (8 >> 1)) / 8; } else { - a = (a + (scale >> 1)) / scale; - b = (b + (scale >> 1)) / scale; - c = (c + (scale >> 1)) / scale; + a = FASTDIV((a + (scale >> 1)), scale); + b = FASTDIV((b + (scale >> 1)), scale); + c = FASTDIV((c + (scale >> 1)), scale); } #endif /* XXX: WARNING: they did not choose the same test as MPEG4. This |