aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBERO <bero@geocities.co.jp>2003-05-14 15:12:13 +0000
committerMichael Niedermayer <michaelni@gmx.at>2003-05-14 15:12:13 +0000
commitd4961b35236beb67785410473442f5923ac8a488 (patch)
tree7b8474ce797d6e8b33ef0b3e6c19ff5b532ea375
parentb82cdc727855fc11f0110c46c39eadd00009ebc0 (diff)
downloadffmpeg-d4961b35236beb67785410473442f5923ac8a488.tar.gz
fastdiv patch by (BERO <bero at geocities dot co dot jp>) with fixes & cleanup by me
Originally committed as revision 1879 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/common.h19
-rw-r--r--libavcodec/h263.c18
-rw-r--r--libavcodec/i386/mpegvideo_mmx.c1
-rw-r--r--libavcodec/msmpeg4.c10
4 files changed, 26 insertions, 22 deletions
diff --git a/libavcodec/common.h b/libavcodec/common.h
index 1a8202d709..092026d78e 100644
--- a/libavcodec/common.h
+++ b/libavcodec/common.h
@@ -197,6 +197,25 @@ inline void dprintf(const char* fmt,...) {}
#define FFMAX(a,b) ((a) > (b) ? (a) : (b))
#define FFMIN(a,b) ((a) > (b) ? (b) : (a))
+extern const uint32_t inverse[256];
+
+#ifdef ARCH_X86
+# define FASTDIV(a,b) \
+ ({\
+ int ret,dmy;\
+ asm volatile(\
+ "mull %3"\
+ :"=d"(ret),"=a"(dmy)\
+ :"1"(a),"g"(inverse[b])\
+ );\
+ ret;\
+ })
+#elif defined(CONFIG_FASTDIV)
+# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a)*inverse[b])>>32))
+#else
+# define FASTDIV(a,b) ((a)/(b))
+#endif
+
#ifdef ARCH_X86
// avoid +32 for shift optimization (gcc should do that ...)
static inline int32_t NEG_SSR32( int32_t a, int8_t s){
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index f6f2efceea..a48f9a93bf 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -73,8 +73,6 @@ static void mpeg4_inv_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
static void mpeg4_decode_sprite_trajectory(MpegEncContext * s);
static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr, int *dir_ptr);
-extern uint32_t inverse[256];
-
#ifdef CONFIG_ENCODERS
static uint8_t uni_DCtab_lum_len[512];
static uint8_t uni_DCtab_chrom_len[512];
@@ -1823,7 +1821,6 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_
{
int a, b, c, wrap, pred, scale;
uint16_t *dc_val;
- int dummy;
/* find prediction */
if (n < 4) {
@@ -1859,16 +1856,7 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_
*dir_ptr = 0; /* left */
}
/* we assume pred is positive */
-#ifdef ARCH_X86
- asm volatile (
- "xorl %%edx, %%edx \n\t"
- "mul %%ecx \n\t"
- : "=d" (pred), "=a"(dummy)
- : "a" (pred + (scale >> 1)), "c" (inverse[scale])
- );
-#else
- pred = (pred + (scale >> 1)) / scale;
-#endif
+ pred = FASTDIV((pred + (scale >> 1)), scale);
/* prepare address for prediction update */
*dc_val_ptr = &dc_val[0];
@@ -3668,8 +3656,8 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
/* DC coef */
if(s->partitioned_frame){
level = s->dc_val[0][ s->block_index[n] ];
- if(n<4) level= (level + (s->y_dc_scale>>1))/s->y_dc_scale; //FIXME optimizs
- else level= (level + (s->c_dc_scale>>1))/s->c_dc_scale;
+ if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
+ else level= FASTDIV((level + (s->c_dc_scale>>1)), s->c_dc_scale);
dc_pred_dir= (s->pred_dir_table[s->mb_x + s->mb_y*s->mb_stride]<<n)&32;
}else{
level = mpeg4_decode_dc(s, n, &dc_pred_dir);
diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/i386/mpegvideo_mmx.c
index 8e452b4991..d2f477b7b3 100644
--- a/libavcodec/i386/mpegvideo_mmx.c
+++ b/libavcodec/i386/mpegvideo_mmx.c
@@ -26,7 +26,6 @@
extern uint8_t zigzag_direct_noperm[64];
extern uint16_t inv_zigzag_direct16[64];
-extern uint32_t inverse[256];
static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL;
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL;
diff --git a/libavcodec/msmpeg4.c b/libavcodec/msmpeg4.c
index e4daec4e8f..093ee985ca 100644
--- a/libavcodec/msmpeg4.c
+++ b/libavcodec/msmpeg4.c
@@ -78,8 +78,6 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
-extern uint32_t inverse[256];
-
#ifdef DEBUG
int intra_count = 0;
@@ -699,7 +697,7 @@ static int get_dc(uint8_t *src, int stride, int scale)
sum+=src[x + y*stride];
}
}
- return (sum + (scale>>1))/scale;
+ return FASTDIV((sum + (scale>>1)), scale);
}
/* dir = 0: left, dir = 1: top prediction */
@@ -763,9 +761,9 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
b = (b + (8 >> 1)) / 8;
c = (c + (8 >> 1)) / 8;
} else {
- a = (a + (scale >> 1)) / scale;
- b = (b + (scale >> 1)) / scale;
- c = (c + (scale >> 1)) / scale;
+ a = FASTDIV((a + (scale >> 1)), scale);
+ b = FASTDIV((b + (scale >> 1)), scale);
+ c = FASTDIV((c + (scale >> 1)), scale);
}
#endif
/* XXX: WARNING: they did not choose the same test as MPEG4. This