aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorArpi <arpi@thot.banki.hu>2001-08-03 18:33:03 +0000
committerArpi <arpi@thot.banki.hu>2001-08-03 18:33:03 +0000
commit4af7bcc1857e8abfa7ae9a8e3c54c93723219438 (patch)
tree22217576fcce734d75eaac61ce399839ec168044 /libavcodec
parent2d6d0c1d66accc7976325190f17dc93ebd9b665d (diff)
downloadffmpeg-4af7bcc1857e8abfa7ae9a8e3c54c93723219438.tar.gz
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
Gives average 13-20% mpeg decoding speedup on x86 systems. Originally committed as revision 30 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/dsputil.c2
-rw-r--r--libavcodec/dsputil.h1
-rw-r--r--libavcodec/i386/dsputil_mmx.c28
-rw-r--r--libavcodec/mpeg12data.h3
-rw-r--r--libavcodec/mpegvideo.c4
-rw-r--r--libavcodec/mpegvideo.h3
6 files changed, 37 insertions, 4 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index b68ba8c521..a41f1bef65 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -21,6 +21,7 @@
#include "avcodec.h"
#include "dsputil.h"
+void (*ff_idct)(DCTELEM *block);
void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
@@ -363,6 +364,7 @@ void dsputil_init(void)
squareTbl[i] = (i - 256) * (i - 256);
}
+ ff_idct = j_rev_dct;
get_pixels = get_pixels_c;
put_pixels_clamped = put_pixels_clamped_c;
add_pixels_clamped = add_pixels_clamped_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 76f4475f82..ffbc395bad 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -25,6 +25,7 @@ void dsputil_init(void);
/* pixel ops : interface with DCT */
+extern void (*ff_idct)(DCTELEM *block);
extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index d4a07c3a77..d9028cf2b9 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -29,6 +29,16 @@ int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
int pix_abs16x16_xy2_mmx(UINT8 *blk1, UINT8 *blk2, int lx, int h);
+#ifdef USE_MMX_IDCT
+/* external functions, defined in libmpeg2 */
+void mmx_idct(DCTELEM *block);
+void mmxext_idct(DCTELEM *block);
+/* this should be in dsputil.h? -- A'rpi */
+extern UINT8 ff_alternate_horizontal_scan[64];
+extern UINT8 ff_alternate_vertical_scan[64];
+extern UINT8 zigzag_direct[64];
+#endif
+
/* pixel operations */
static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001;
static const unsigned long long int mm_wtwo __attribute__ ((aligned(8))) = 0x0002000200020002;
@@ -1039,5 +1049,23 @@ void dsputil_init_mmx(void)
sub_pixels_tab[1] = sub_pixels_x2_3dnow;
sub_pixels_tab[2] = sub_pixels_y2_3dnow;
}
+
+#ifdef USE_MMX_IDCT
+ /* use MMX / MMXEXT iDCT code from libmpeg2 */
+ //printf("LIBAVCODEC: Using MMX%s iDCT code\n",(mm_flags & MM_MMXEXT)?"EXT":"");
+ ff_idct = (mm_flags & MM_MMXEXT) ? mmxext_idct : mmx_idct;
+ /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
+ { int i,j;
+ for (i = 0; i < 64; i++) {
+ j = zigzag_direct[i];
+ zigzag_direct[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+ j = ff_alternate_horizontal_scan[i];
+ ff_alternate_horizontal_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+ j = ff_alternate_vertical_scan[i];
+ ff_alternate_vertical_scan[i] = (j & 0x38) | ((j & 6) >> 1) | ((j & 1) << 2);
+ }
+ }
+#endif
+
}
}
diff --git a/libavcodec/mpeg12data.h b/libavcodec/mpeg12data.h
index 2b5a9c2e22..7dbf2b2208 100644
--- a/libavcodec/mpeg12data.h
+++ b/libavcodec/mpeg12data.h
@@ -331,7 +331,8 @@ static const UINT8 mbMotionVectorTable[17][2] = {
{ 0xc, 10 },
};
-const UINT8 zigzag_direct[64] = {
+//const
+UINT8 zigzag_direct[64] = {
0, 1, 8, 16, 9, 2, 3, 10,
17, 24, 32, 25, 18, 11, 4, 5,
12, 19, 26, 33, 40, 48, 41, 34,
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 75a0c83f1d..c046dd2315 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -634,7 +634,7 @@ static inline void put_dct(MpegEncContext *s,
{
if (!s->mpeg2)
s->dct_unquantize(s, block, i, s->qscale);
- j_rev_dct (block);
+ ff_idct (block);
put_pixels_clamped(block, dest, line_size);
}
@@ -645,7 +645,7 @@ static inline void add_dct(MpegEncContext *s,
if (s->block_last_index[i] >= 0) {
if (!s->mpeg2)
s->dct_unquantize(s, block, i, s->qscale);
- j_rev_dct (block);
+ ff_idct (block);
add_pixels_clamped(block, dest, line_size);
}
}
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 71f8b139cf..9f93073930 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -179,7 +179,8 @@ typedef struct MpegEncContext {
DCTELEM *block, int n, int qscale);
} MpegEncContext;
-extern const UINT8 zigzag_direct[64];
+//const
+extern UINT8 zigzag_direct[64];
int MPV_common_init(MpegEncContext *s);
void MPV_common_end(MpegEncContext *s);