diff options
author | Falk Hüffner <mellum@users.sourceforge.net> | 2002-07-03 01:09:44 +0000 |
---|---|---|
committer | Falk Hüffner <mellum@users.sourceforge.net> | 2002-07-03 01:09:44 +0000 |
commit | dde3f77dbc30a6564ca8ad373b7517f282983bdb (patch) | |
tree | c47e6c36560d2b907aa095ca33ea551b1e7fbb89 /libavcodec/alpha/dsputil_alpha.c | |
parent | 3530320dbf753d2896cd9a35353622268e28b53e (diff) | |
download | ffmpeg-dde3f77dbc30a6564ca8ad373b7517f282983bdb.tar.gz |
Use updated motion compensation routines.
Originally committed as revision 713 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/alpha/dsputil_alpha.c')
-rw-r--r-- | libavcodec/alpha/dsputil_alpha.c | 213 |
1 files changed, 114 insertions, 99 deletions
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c index 942eef780c..db11d529fb 100644 --- a/libavcodec/alpha/dsputil_alpha.c +++ b/libavcodec/alpha/dsputil_alpha.c @@ -105,132 +105,137 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, } #endif -/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1 - Since the immediate result could be greater than 255, we do the - shift first. The result is too low by one if the bytes were both - odd, so we need to add (l1 & l2) & BYTE_VEC(0x01). */ -static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2) +static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) { - UINT64 correction = (l1 & l2) & BYTE_VEC(0x01); - l1 = (l1 & ~BYTE_VEC(0x01)) >> 1; - l2 = (l2 & ~BYTE_VEC(0x01)) >> 1; - return l1 + l2 + correction; + return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); } -/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1 - The '1' only has an effect when one byte is even and the other odd, - i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01). - Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01). */ -static inline UINT64 avg2(UINT64 l1, UINT64 l2) +static inline uint64_t avg2(uint64_t a, uint64_t b) { - UINT64 correction = (l1 | l2) & BYTE_VEC(0x01); - l1 = (l1 & ~BYTE_VEC(0x01)) >> 1; - l2 = (l2 & ~BYTE_VEC(0x01)) >> 1; - return l1 + l2 + correction; + return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); } -static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) +static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) { - UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - UINT64 r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); + uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); return r1 + r2; } -static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) +static inline uint64_t avg4_no_rnd(uint64_t l1, uint64_t l2, + uint64_t l3, uint64_t l4) { - UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - UINT64 r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03); + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); + uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03); return r1 + r2; } -#define PIXOPNAME(suffix) put ## suffix -#define BTYPE UINT8 -#define AVG2 avg2 -#define AVG4 avg4 -#define STORE(l, b) stq(l, b) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE +#define OP(LOAD, STORE, INCR) \ + do { \ + STORE(LOAD(pixels), block); \ + pixels += line_size; \ + block += INCR; \ + } while (--h) -#define PIXOPNAME(suffix) put_no_rnd ## suffix -#define BTYPE UINT8 -#define AVG2 avg2_no_rnd -#define AVG4 avg4_no_rnd -#define STORE(l, b) stq(l, b) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE +#define OP_X2(LOAD, STORE, INCR) \ + do { \ + uint64_t pix1, pix2; \ + \ + pix1 = LOAD(pixels); \ + pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + STORE(AVG2(pix1, pix2), block); \ + pixels += line_size; \ + block += INCR; \ + } while (--h) -/* The following functions are untested. */ -#if 0 +#define OP_Y2(LOAD, STORE, INCR) \ + do { \ + uint64_t pix = LOAD(pixels); \ + do { \ + uint64_t next_pix; \ + \ + pixels += line_size; \ + next_pix = LOAD(pixels); \ + STORE(AVG2(pix, next_pix), block); \ + block += INCR; \ + pix = next_pix; \ + } while (--h); \ + } while (0) + +#define OP_XY2(LOAD, STORE, INCR) \ + do { \ + uint64_t pix1 = LOAD(pixels); \ + uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + \ + do { \ + uint64_t next_pix1, next_pix2; \ + \ + pixels += line_size; \ + next_pix1 = LOAD(pixels); \ + next_pix2 = next_pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + \ + STORE(AVG4(pix1, pix2, next_pix1, next_pix2), block); \ + \ + block += INCR; \ + pix1 = next_pix1; \ + pix2 = next_pix2; \ + } while (--h); \ + } while (0) -#define PIXOPNAME(suffix) avg ## suffix -#define BTYPE UINT8 +#define MAKE_OP(BTYPE, OPNAME, SUFF, OPKIND, STORE, INCR) \ +static void OPNAME ## _pixels ## SUFF ## _axp(BTYPE *block, \ + const uint8_t *pixels, \ + int line_size, int h) \ +{ \ + if ((size_t) pixels & 0x7) { \ + OPKIND(uldq, STORE, INCR); \ + } else { \ + OPKIND(ldq, STORE, INCR); \ + } \ +} + +#define PIXOP(BTYPE, OPNAME, STORE, INCR) \ + MAKE_OP(BTYPE, OPNAME, , OP, STORE, INCR); \ + MAKE_OP(BTYPE, OPNAME, _x2, OP_X2, STORE, INCR); \ + MAKE_OP(BTYPE, OPNAME, _y2, OP_Y2, STORE, INCR); \ + MAKE_OP(BTYPE, OPNAME, _xy2, OP_XY2, STORE, INCR); + +/* Rounding primitives. */ #define AVG2 avg2 #define AVG4 avg4 +#define STORE(l, b) stq(l, b) +PIXOP(uint8_t, put, STORE, line_size); + +#undef STORE #define STORE(l, b) stq(AVG2(l, ldq(b)), b); -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE +PIXOP(uint8_t, avg, STORE, line_size); + +/* Not rounding primitives. */ #undef AVG2 #undef AVG4 #undef STORE - -#define PIXOPNAME(suffix) avg_no_rnd ## suffix -#define BTYPE UINT8 #define AVG2 avg2_no_rnd #define AVG4 avg4_no_rnd -#define STORE(l, b) stq(AVG2(l, ldq(b)), b); -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE +#define STORE(l, b) stq(l, b) +PIXOP(uint8_t, put_no_rnd, STORE, line_size); -#define PIXOPNAME(suffix) sub ## suffix -#define BTYPE DCTELEM -#define AVG2 avg2 -#define AVG4 avg4 -#define STORE(l, block) do { \ - UINT64 xxx = l; \ - (block)[0] -= (xxx >> 0) & 0xff; \ - (block)[1] -= (xxx >> 8) & 0xff; \ - (block)[2] -= (xxx >> 16) & 0xff; \ - (block)[3] -= (xxx >> 24) & 0xff; \ - (block)[4] -= (xxx >> 32) & 0xff; \ - (block)[5] -= (xxx >> 40) & 0xff; \ - (block)[6] -= (xxx >> 48) & 0xff; \ - (block)[7] -= (xxx >> 56) & 0xff; \ -} while (0) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 #undef STORE - -#endif +#define STORE(l, b) stq(AVG2(l, ldq(b)), b); +PIXOP(uint8_t, avg_no_rnd, STORE, line_size); void dsputil_init_alpha(void) { @@ -244,6 +249,16 @@ void dsputil_init_alpha(void) put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp; put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp; + avg_pixels_tab[0] = avg_pixels_axp; + avg_pixels_tab[1] = avg_pixels_x2_axp; + avg_pixels_tab[2] = avg_pixels_y2_axp; + avg_pixels_tab[3] = avg_pixels_xy2_axp; + + avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp; + avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp; + avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp; + avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp; + /* amask clears all bits that correspond to present features. */ if (amask(AMASK_MVI) == 0) { put_pixels_clamped = put_pixels_clamped_mvi_asm; |