aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/alpha/dsputil_alpha.c
diff options
context:
space:
mode:
authorFalk Hüffner <mellum@users.sourceforge.net>2002-07-03 01:09:44 +0000
committerFalk Hüffner <mellum@users.sourceforge.net>2002-07-03 01:09:44 +0000
commitdde3f77dbc30a6564ca8ad373b7517f282983bdb (patch)
treec47e6c36560d2b907aa095ca33ea551b1e7fbb89 /libavcodec/alpha/dsputil_alpha.c
parent3530320dbf753d2896cd9a35353622268e28b53e (diff)
downloadffmpeg-dde3f77dbc30a6564ca8ad373b7517f282983bdb.tar.gz
Use updated motion compensation routines.
Originally committed as revision 713 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/alpha/dsputil_alpha.c')
-rw-r--r--libavcodec/alpha/dsputil_alpha.c213
1 files changed, 114 insertions, 99 deletions
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 942eef780c..db11d529fb 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -105,132 +105,137 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
}
#endif
-/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1
- Since the immediate result could be greater than 255, we do the
- shift first. The result is too low by one if the bytes were both
- odd, so we need to add (l1 & l2) & BYTE_VEC(0x01). */
-static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2)
+static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
{
- UINT64 correction = (l1 & l2) & BYTE_VEC(0x01);
- l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
- l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
- return l1 + l2 + correction;
+ return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
}
-/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1
- The '1' only has an effect when one byte is even and the other odd,
- i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01).
- Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01). */
-static inline UINT64 avg2(UINT64 l1, UINT64 l2)
+static inline uint64_t avg2(uint64_t a, uint64_t b)
{
- UINT64 correction = (l1 | l2) & BYTE_VEC(0x01);
- l1 = (l1 & ~BYTE_VEC(0x01)) >> 1;
- l2 = (l2 & ~BYTE_VEC(0x01)) >> 1;
- return l1 + l2 + correction;
+ return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
}
-static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
+static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
{
- UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
- + ((l2 & ~BYTE_VEC(0x03)) >> 2)
- + ((l3 & ~BYTE_VEC(0x03)) >> 2)
- + ((l4 & ~BYTE_VEC(0x03)) >> 2);
- UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
- + (l2 & BYTE_VEC(0x03))
- + (l3 & BYTE_VEC(0x03))
- + (l4 & BYTE_VEC(0x03))
- + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+ uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+ uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
+ + (l2 & BYTE_VEC(0x03))
+ + (l3 & BYTE_VEC(0x03))
+ + (l4 & BYTE_VEC(0x03))
+ + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
return r1 + r2;
}
-static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4)
+static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2,
+ uint64_t l3, uint64_t l4)
{
- UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
- + ((l2 & ~BYTE_VEC(0x03)) >> 2)
- + ((l3 & ~BYTE_VEC(0x03)) >> 2)
- + ((l4 & ~BYTE_VEC(0x03)) >> 2);
- UINT64 r2 = (( (l1 & BYTE_VEC(0x03))
- + (l2 & BYTE_VEC(0x03))
- + (l3 & BYTE_VEC(0x03))
- + (l4 & BYTE_VEC(0x03))
- + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
+ uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+ uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
+ + (l2 & BYTE_VEC(0x03))
+ + (l3 & BYTE_VEC(0x03))
+ + (l4 & BYTE_VEC(0x03))
+ + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03);
return r1 + r2;
}
-#define PIXOPNAME(suffix) put ## suffix
-#define BTYPE UINT8
-#define AVG2 avg2
-#define AVG4 avg4
-#define STORE(l, b) stq(l, b)
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
-#undef AVG2
-#undef AVG4
-#undef STORE
+#define OP(LOAD, STORE, INCR) \
+ do { \
+ STORE(LOAD(pixels), block); \
+ pixels += line_size; \
+ block += INCR; \
+ } while (--h)
-#define PIXOPNAME(suffix) put_no_rnd ## suffix
-#define BTYPE UINT8
-#define AVG2 avg2_no_rnd
-#define AVG4 avg4_no_rnd
-#define STORE(l, b) stq(l, b)
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
-#undef AVG2
-#undef AVG4
-#undef STORE
+#define OP_X2(LOAD, STORE, INCR) \
+ do { \
+ uint64_t pix1, pix2; \
+ \
+ pix1 = LOAD(pixels); \
+ pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ STORE(AVG2(pix1, pix2), block); \
+ pixels += line_size; \
+ block += INCR; \
+ } while (--h)
-/* The following functions are untested. */
-#if 0
+#define OP_Y2(LOAD, STORE, INCR) \
+ do { \
+ uint64_t pix = LOAD(pixels); \
+ do { \
+ uint64_t next_pix; \
+ \
+ pixels += line_size; \
+ next_pix = LOAD(pixels); \
+ STORE(AVG2(pix, next_pix), block); \
+ block += INCR; \
+ pix = next_pix; \
+ } while (--h); \
+ } while (0)
+
+#define OP_XY2(LOAD, STORE, INCR) \
+ do { \
+ uint64_t pix1 = LOAD(pixels); \
+ uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ \
+ do { \
+ uint64_t next_pix1, next_pix2; \
+ \
+ pixels += line_size; \
+ next_pix1 = LOAD(pixels); \
+ next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ \
+ STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); \
+ \
+ block += INCR; \
+ pix1 = next_pix1; \
+ pix2 = next_pix2; \
+ } while (--h); \
+ } while (0)
-#define PIXOPNAME(suffix) avg ## suffix
-#define BTYPE UINT8
+#define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR) \
+static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block, \
+ const uint8_t *pixels, \
+ int line_size, int h) \
+{ \
+ if ((size_t) pixels & 0x7) { \
+ OPKIND(uldq, STORE, INCR); \
+ } else { \
+ OPKIND(ldq, STORE, INCR); \
+ } \
+}
+
+#define PIXOP(BTYPE, OPNAME, STORE, INCR) \
+ MAKE_OP(BTYPE, OPNAME, , OP, STORE, INCR); \
+ MAKE_OP(BTYPE, OPNAME, _x2, OP_X2, STORE, INCR); \
+ MAKE_OP(BTYPE, OPNAME, _y2, OP_Y2, STORE, INCR); \
+ MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR);
+
+/* Rounding primitives. */
#define AVG2 avg2
#define AVG4 avg4
+#define STORE(l, b) stq(l, b)
+PIXOP(uint8_t, put, STORE, line_size);
+
+#undef STORE
#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
+PIXOP(uint8_t, avg, STORE, line_size);
+
+/* Not rounding primitives. */
#undef AVG2
#undef AVG4
#undef STORE
-
-#define PIXOPNAME(suffix) avg_no_rnd ## suffix
-#define BTYPE UINT8
#define AVG2 avg2_no_rnd
#define AVG4 avg4_no_rnd
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
-#undef AVG2
-#undef AVG4
-#undef STORE
+#define STORE(l, b) stq(l, b)
+PIXOP(uint8_t, put_no_rnd, STORE, line_size);
-#define PIXOPNAME(suffix) sub ## suffix
-#define BTYPE DCTELEM
-#define AVG2 avg2
-#define AVG4 avg4
-#define STORE(l, block) do { \
- UINT64 xxx = l; \
- (block)[0] -= (xxx >> 0) & 0xff; \
- (block)[1] -= (xxx >> 8) & 0xff; \
- (block)[2] -= (xxx >> 16) & 0xff; \
- (block)[3] -= (xxx >> 24) & 0xff; \
- (block)[4] -= (xxx >> 32) & 0xff; \
- (block)[5] -= (xxx >> 40) & 0xff; \
- (block)[6] -= (xxx >> 48) & 0xff; \
- (block)[7] -= (xxx >> 56) & 0xff; \
-} while (0)
-#include "pixops.h"
-#undef PIXOPNAME
-#undef BTYPE
-#undef AVG2
-#undef AVG4
#undef STORE
-
-#endif
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(uint8_t, avg_no_rnd, STORE, line_size);
void dsputil_init_alpha(void)
{
@@ -244,6 +249,16 @@ void dsputil_init_alpha(void)
put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp;
put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp;
+ avg_pixels_tab[0] = avg_pixels_axp;
+ avg_pixels_tab[1] = avg_pixels_x2_axp;
+ avg_pixels_tab[2] = avg_pixels_y2_axp;
+ avg_pixels_tab[3] = avg_pixels_xy2_axp;
+
+ avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp;
+ avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp;
+ avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp;
+ avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp;
+
/* amask clears all bits that correspond to present features. */
if (amask(AMASK_MVI) == 0) {
put_pixels_clamped = put_pixels_clamped_mvi_asm;