aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2013-03-10 16:53:07 -0700
committerMartin Storsjö <martin@martin.st>2013-04-19 23:29:22 +0300
commit2957d29f0531ccd8a6f4378293424dfd92db3044 (patch)
tree3b17973f39abf686ef973484ba3fc3d08c66dd36 /libavcodec
parent78ce568e43a7f3993c33100aa8f5d56c4c4bd493 (diff)
downloadffmpeg-2957d29f0531ccd8a6f4378293424dfd92db3044.tar.gz
alpha: hpeldsp: Move half-pel assembly from dsputil to hpeldsp
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/alpha/Makefile2
-rw-r--r--libavcodec/alpha/dsputil_alpha.c185
-rw-r--r--libavcodec/alpha/dsputil_alpha.h2
-rw-r--r--libavcodec/alpha/dsputil_alpha_asm.S103
-rw-r--r--libavcodec/alpha/hpeldsp_alpha.c213
-rw-r--r--libavcodec/alpha/hpeldsp_alpha.h28
-rw-r--r--libavcodec/alpha/hpeldsp_alpha_asm.S124
-rw-r--r--libavcodec/alpha/regdef.h11
-rw-r--r--libavcodec/hpeldsp.c2
-rw-r--r--libavcodec/hpeldsp.h1
10 files changed, 381 insertions, 290 deletions
diff --git a/libavcodec/alpha/Makefile b/libavcodec/alpha/Makefile
index e28200d45a..6f22137167 100644
--- a/libavcodec/alpha/Makefile
+++ b/libavcodec/alpha/Makefile
@@ -4,4 +4,6 @@ OBJS += alpha/dsputil_alpha.o \
alpha/motion_est_mvi_asm.o \
alpha/simple_idct_alpha.o \
+OBJS-$(CONFIG_HPELDSP) += alpha/hpeldsp_alpha.o \
+ alpha/hpeldsp_alpha_asm.o
OBJS-$(CONFIG_MPEGVIDEO) += alpha/mpegvideo_alpha.o
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 34fe2ebcad..7a41cb8ebb 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -119,196 +119,11 @@ static void clear_blocks_axp(int16_t *blocks) {
} while (n);
}
-static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
-{
- return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-static inline uint64_t avg2(uint64_t a, uint64_t b)
-{
- return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
-}
-
-#if 0
-/* The XY2 routines basically utilize this scheme, but reuse parts in
- each iteration. */
-static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
-{
- uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
- + ((l2 & ~BYTE_VEC(0x03)) >> 2)
- + ((l3 & ~BYTE_VEC(0x03)) >> 2)
- + ((l4 & ~BYTE_VEC(0x03)) >> 2);
- uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
- + (l2 & BYTE_VEC(0x03))
- + (l3 & BYTE_VEC(0x03))
- + (l4 & BYTE_VEC(0x03))
- + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
- return r1 + r2;
-}
-#endif
-
-#define OP(LOAD, STORE) \
- do { \
- STORE(LOAD(pixels), block); \
- pixels += line_size; \
- block += line_size; \
- } while (--h)
-
-#define OP_X2(LOAD, STORE) \
- do { \
- uint64_t pix1, pix2; \
- \
- pix1 = LOAD(pixels); \
- pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- STORE(AVG2(pix1, pix2), block); \
- pixels += line_size; \
- block += line_size; \
- } while (--h)
-
-#define OP_Y2(LOAD, STORE) \
- do { \
- uint64_t pix = LOAD(pixels); \
- do { \
- uint64_t next_pix; \
- \
- pixels += line_size; \
- next_pix = LOAD(pixels); \
- STORE(AVG2(pix, next_pix), block); \
- block += line_size; \
- pix = next_pix; \
- } while (--h); \
- } while (0)
-
-#define OP_XY2(LOAD, STORE) \
- do { \
- uint64_t pix1 = LOAD(pixels); \
- uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
- + (pix2 & BYTE_VEC(0x03)); \
- uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
- + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
- \
- do { \
- uint64_t npix1, npix2; \
- uint64_t npix_l, npix_h; \
- uint64_t avg; \
- \
- pixels += line_size; \
- npix1 = LOAD(pixels); \
- npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
- npix_l = (npix1 & BYTE_VEC(0x03)) \
- + (npix2 & BYTE_VEC(0x03)); \
- npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
- + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
- avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
- + pix_h + npix_h; \
- STORE(avg, block); \
- \
- block += line_size; \
- pix_l = npix_l; \
- pix_h = npix_h; \
- } while (--h); \
- } while (0)
-
-#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
-static void OPNAME ## _pixels ## SUFF ## _axp \
- (uint8_t *restrict block, const uint8_t *restrict pixels, \
- ptrdiff_t line_size, int h) \
-{ \
- if ((size_t) pixels & 0x7) { \
- OPKIND(uldq, STORE); \
- } else { \
- OPKIND(ldq, STORE); \
- } \
-} \
- \
-static void OPNAME ## _pixels16 ## SUFF ## _axp \
- (uint8_t *restrict block, const uint8_t *restrict pixels, \
- ptrdiff_t line_size, int h) \
-{ \
- OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
- OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
-}
-
-#define PIXOP(OPNAME, STORE) \
- MAKE_OP(OPNAME, , OP, STORE) \
- MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
- MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
- MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
-
-/* Rounding primitives. */
-#define AVG2 avg2
-#define AVG4 avg4
-#define AVG4_ROUNDER BYTE_VEC(0x02)
-#define STORE(l, b) stq(l, b)
-PIXOP(put, STORE);
-
-#undef STORE
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-PIXOP(avg, STORE);
-
-/* Not rounding primitives. */
-#undef AVG2
-#undef AVG4
-#undef AVG4_ROUNDER
-#undef STORE
-#define AVG2 avg2_no_rnd
-#define AVG4 avg4_no_rnd
-#define AVG4_ROUNDER BYTE_VEC(0x01)
-#define STORE(l, b) stq(l, b)
-PIXOP(put_no_rnd, STORE);
-
-#undef STORE
-#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
-PIXOP(avg_no_rnd, STORE);
-
-static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h)
-{
- put_pixels_axp_asm(block, pixels, line_size, h);
- put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
-}
-
av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
{
const int high_bit_depth = avctx->bits_per_raw_sample > 8;
if (!high_bit_depth) {
- c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
- c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
- c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
- c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
-
- c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
- c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
-
- c->avg_pixels_tab[0][0] = avg_pixels16_axp;
- c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
- c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
-
- c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
- c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
- c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
- c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
-
- c->put_pixels_tab[1][0] = put_pixels_axp_asm;
- c->put_pixels_tab[1][1] = put_pixels_x2_axp;
- c->put_pixels_tab[1][2] = put_pixels_y2_axp;
- c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
-
- c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
- c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
-
- c->avg_pixels_tab[1][0] = avg_pixels_axp;
- c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
- c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
- c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
-
c->clear_blocks = clear_blocks_axp;
}
diff --git a/libavcodec/alpha/dsputil_alpha.h b/libavcodec/alpha/dsputil_alpha.h
index fcea47c665..d976c18e2e 100644
--- a/libavcodec/alpha/dsputil_alpha.h
+++ b/libavcodec/alpha/dsputil_alpha.h
@@ -26,8 +26,6 @@ void ff_simple_idct_axp(int16_t *block);
void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block);
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block);
-void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
- ptrdiff_t line_size, int h);
void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
int line_size);
void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
diff --git a/libavcodec/alpha/dsputil_alpha_asm.S b/libavcodec/alpha/dsputil_alpha_asm.S
index 1f589aa0c0..afe02cc391 100644
--- a/libavcodec/alpha/dsputil_alpha_asm.S
+++ b/libavcodec/alpha/dsputil_alpha_asm.S
@@ -26,115 +26,12 @@
#include "regdef.h"
-/* Some nicer register names. */
-#define ta t10
-#define tb t11
-#define tc t12
-#define td AT
-/* Danger: these overlap with the argument list and the return value */
-#define te a5
-#define tf a4
-#define tg a3
-#define th v0
-
.set noat
.set noreorder
.arch pca56
.text
/************************************************************************
- * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
- * int line_size, int h)
- */
- .align 6
- .globl put_pixels_axp_asm
- .ent put_pixels_axp_asm
-put_pixels_axp_asm:
- .frame sp, 0, ra
- .prologue 0
-
- and a1, 7, t0
- beq t0, $aligned
-
- .align 4
-$unaligned:
- ldq_u t0, 0(a1)
- ldq_u t1, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t2, 0(a1)
- ldq_u t3, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t4, 0(a1)
- ldq_u t5, 8(a1)
- addq a1, a2, a1
- nop
-
- ldq_u t6, 0(a1)
- ldq_u t7, 8(a1)
- extql t0, a1, t0
- addq a1, a2, a1
-
- extqh t1, a1, t1
- addq a0, a2, t8
- extql t2, a1, t2
- addq t8, a2, t9
-
- extqh t3, a1, t3
- addq t9, a2, ta
- extql t4, a1, t4
- or t0, t1, t0
-
- extqh t5, a1, t5
- or t2, t3, t2
- extql t6, a1, t6
- or t4, t5, t4
-
- extqh t7, a1, t7
- or t6, t7, t6
- stq t0, 0(a0)
- stq t2, 0(t8)
-
- stq t4, 0(t9)
- subq a3, 4, a3
- stq t6, 0(ta)
- addq ta, a2, a0
-
- bne a3, $unaligned
- ret
-
- .align 4
-$aligned:
- ldq t0, 0(a1)
- addq a1, a2, a1
- ldq t1, 0(a1)
- addq a1, a2, a1
-
- ldq t2, 0(a1)
- addq a1, a2, a1
- ldq t3, 0(a1)
-
- addq a0, a2, t4
- addq a1, a2, a1
- addq t4, a2, t5
- subq a3, 4, a3
-
- stq t0, 0(a0)
- addq t5, a2, t6
- stq t1, 0(t4)
- addq t6, a2, a0
-
- stq t2, 0(t5)
- stq t3, 0(t6)
-
- bne a3, $aligned
- ret
- .end put_pixels_axp_asm
-
-/************************************************************************
* void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
* int line_size)
*/
diff --git a/libavcodec/alpha/hpeldsp_alpha.c b/libavcodec/alpha/hpeldsp_alpha.c
new file mode 100644
index 0000000000..144fa223f9
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha.c
@@ -0,0 +1,213 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/attributes.h"
+#include "libavcodec/hpeldsp.h"
+#include "hpeldsp_alpha.h"
+#include "asm.h"
+
+static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
+{
+ return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+static inline uint64_t avg2(uint64_t a, uint64_t b)
+{
+ return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
+}
+
+#if 0
+/* The XY2 routines basically utilize this scheme, but reuse parts in
+ each iteration. */
+static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4)
+{
+ uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l2 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l3 & ~BYTE_VEC(0x03)) >> 2)
+ + ((l4 & ~BYTE_VEC(0x03)) >> 2);
+ uint64_t r2 = (( (l1 & BYTE_VEC(0x03))
+ + (l2 & BYTE_VEC(0x03))
+ + (l3 & BYTE_VEC(0x03))
+ + (l4 & BYTE_VEC(0x03))
+ + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03);
+ return r1 + r2;
+}
+#endif
+
+#define OP(LOAD, STORE) \
+ do { \
+ STORE(LOAD(pixels), block); \
+ pixels += line_size; \
+ block += line_size; \
+ } while (--h)
+
+#define OP_X2(LOAD, STORE) \
+ do { \
+ uint64_t pix1, pix2; \
+ \
+ pix1 = LOAD(pixels); \
+ pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ STORE(AVG2(pix1, pix2), block); \
+ pixels += line_size; \
+ block += line_size; \
+ } while (--h)
+
+#define OP_Y2(LOAD, STORE) \
+ do { \
+ uint64_t pix = LOAD(pixels); \
+ do { \
+ uint64_t next_pix; \
+ \
+ pixels += line_size; \
+ next_pix = LOAD(pixels); \
+ STORE(AVG2(pix, next_pix), block); \
+ block += line_size; \
+ pix = next_pix; \
+ } while (--h); \
+ } while (0)
+
+#define OP_XY2(LOAD, STORE) \
+ do { \
+ uint64_t pix1 = LOAD(pixels); \
+ uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \
+ + (pix2 & BYTE_VEC(0x03)); \
+ uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \
+ + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \
+ \
+ do { \
+ uint64_t npix1, npix2; \
+ uint64_t npix_l, npix_h; \
+ uint64_t avg; \
+ \
+ pixels += line_size; \
+ npix1 = LOAD(pixels); \
+ npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \
+ npix_l = (npix1 & BYTE_VEC(0x03)) \
+ + (npix2 & BYTE_VEC(0x03)); \
+ npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \
+ + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \
+ avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \
+ + pix_h + npix_h; \
+ STORE(avg, block); \
+ \
+ block += line_size; \
+ pix_l = npix_l; \
+ pix_h = npix_h; \
+ } while (--h); \
+ } while (0)
+
+#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \
+static void OPNAME ## _pixels ## SUFF ## _axp \
+ (uint8_t *restrict block, const uint8_t *restrict pixels, \
+ ptrdiff_t line_size, int h) \
+{ \
+ if ((size_t) pixels & 0x7) { \
+ OPKIND(uldq, STORE); \
+ } else { \
+ OPKIND(ldq, STORE); \
+ } \
+} \
+ \
+static void OPNAME ## _pixels16 ## SUFF ## _axp \
+ (uint8_t *restrict block, const uint8_t *restrict pixels, \
+ ptrdiff_t line_size, int h) \
+{ \
+ OPNAME ## _pixels ## SUFF ## _axp(block, pixels, line_size, h); \
+ OPNAME ## _pixels ## SUFF ## _axp(block + 8, pixels + 8, line_size, h); \
+}
+
+#define PIXOP(OPNAME, STORE) \
+ MAKE_OP(OPNAME, , OP, STORE) \
+ MAKE_OP(OPNAME, _x2, OP_X2, STORE) \
+ MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \
+ MAKE_OP(OPNAME, _xy2, OP_XY2, STORE)
+
+/* Rounding primitives. */
+#define AVG2 avg2
+#define AVG4 avg4
+#define AVG4_ROUNDER BYTE_VEC(0x02)
+#define STORE(l, b) stq(l, b)
+PIXOP(put, STORE);
+
+#undef STORE
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(avg, STORE);
+
+/* Not rounding primitives. */
+#undef AVG2
+#undef AVG4
+#undef AVG4_ROUNDER
+#undef STORE
+#define AVG2 avg2_no_rnd
+#define AVG4 avg4_no_rnd
+#define AVG4_ROUNDER BYTE_VEC(0x01)
+#define STORE(l, b) stq(l, b)
+PIXOP(put_no_rnd, STORE);
+
+#undef STORE
+#define STORE(l, b) stq(AVG2(l, ldq(b)), b);
+PIXOP(avg_no_rnd, STORE);
+
+static void put_pixels16_axp_asm(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h)
+{
+ put_pixels_axp_asm(block, pixels, line_size, h);
+ put_pixels_axp_asm(block + 8, pixels + 8, line_size, h);
+}
+
+av_cold void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags)
+{
+ c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
+ c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_axp;
+ c->put_pixels_tab[0][3] = put_pixels16_xy2_axp;
+
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_axp_asm;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_axp;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_axp;
+ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_axp;
+
+ c->avg_pixels_tab[0][0] = avg_pixels16_axp;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_axp;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_axp;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_axp;
+
+ c->avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels16_axp;
+ c->avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels16_x2_axp;
+ c->avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels16_y2_axp;
+ c->avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels16_xy2_axp;
+
+ c->put_pixels_tab[1][0] = put_pixels_axp_asm;
+ c->put_pixels_tab[1][1] = put_pixels_x2_axp;
+ c->put_pixels_tab[1][2] = put_pixels_y2_axp;
+ c->put_pixels_tab[1][3] = put_pixels_xy2_axp;
+
+ c->put_no_rnd_pixels_tab[1][0] = put_pixels_axp_asm;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels_x2_axp;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels_y2_axp;
+ c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels_xy2_axp;
+
+ c->avg_pixels_tab[1][0] = avg_pixels_axp;
+ c->avg_pixels_tab[1][1] = avg_pixels_x2_axp;
+ c->avg_pixels_tab[1][2] = avg_pixels_y2_axp;
+ c->avg_pixels_tab[1][3] = avg_pixels_xy2_axp;
+}
diff --git a/libavcodec/alpha/hpeldsp_alpha.h b/libavcodec/alpha/hpeldsp_alpha.h
new file mode 100644
index 0000000000..e44ff503f7
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha.h
@@ -0,0 +1,28 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ALPHA_HPELDSP_ALPHA_H
+#define AVCODEC_ALPHA_HPELDSP_ALPHA_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ ptrdiff_t line_size, int h);
+
+#endif /* AVCODEC_ALPHA_HPELDSP_ALPHA_H */
diff --git a/libavcodec/alpha/hpeldsp_alpha_asm.S b/libavcodec/alpha/hpeldsp_alpha_asm.S
new file mode 100644
index 0000000000..b23d24f806
--- /dev/null
+++ b/libavcodec/alpha/hpeldsp_alpha_asm.S
@@ -0,0 +1,124 @@
+/*
+ * Alpha optimized DSP utils
+ * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * These functions are scheduled for pca56. They should work
+ * reasonably on ev6, though.
+ */
+
+#include "regdef.h"
+
+ .set noat
+ .set noreorder
+ .arch pca56
+ .text
+
+/************************************************************************
+ * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
+ * int line_size, int h)
+ */
+ .align 6
+ .globl put_pixels_axp_asm
+ .ent put_pixels_axp_asm
+put_pixels_axp_asm:
+ .frame sp, 0, ra
+ .prologue 0
+
+ and a1, 7, t0
+ beq t0, $aligned
+
+ .align 4
+$unaligned:
+ ldq_u t0, 0(a1)
+ ldq_u t1, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t2, 0(a1)
+ ldq_u t3, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t4, 0(a1)
+ ldq_u t5, 8(a1)
+ addq a1, a2, a1
+ nop
+
+ ldq_u t6, 0(a1)
+ ldq_u t7, 8(a1)
+ extql t0, a1, t0
+ addq a1, a2, a1
+
+ extqh t1, a1, t1
+ addq a0, a2, t8
+ extql t2, a1, t2
+ addq t8, a2, t9
+
+ extqh t3, a1, t3
+ addq t9, a2, ta
+ extql t4, a1, t4
+ or t0, t1, t0
+
+ extqh t5, a1, t5
+ or t2, t3, t2
+ extql t6, a1, t6
+ or t4, t5, t4
+
+ extqh t7, a1, t7
+ or t6, t7, t6
+ stq t0, 0(a0)
+ stq t2, 0(t8)
+
+ stq t4, 0(t9)
+ subq a3, 4, a3
+ stq t6, 0(ta)
+ addq ta, a2, a0
+
+ bne a3, $unaligned
+ ret
+
+ .align 4
+$aligned:
+ ldq t0, 0(a1)
+ addq a1, a2, a1
+ ldq t1, 0(a1)
+ addq a1, a2, a1
+
+ ldq t2, 0(a1)
+ addq a1, a2, a1
+ ldq t3, 0(a1)
+
+ addq a0, a2, t4
+ addq a1, a2, a1
+ addq t4, a2, t5
+ subq a3, 4, a3
+
+ stq t0, 0(a0)
+ addq t5, a2, t6
+ stq t1, 0(t4)
+ addq t6, a2, a0
+
+ stq t2, 0(t5)
+ stq t3, 0(t6)
+
+ bne a3, $aligned
+ ret
+ .end put_pixels_axp_asm
diff --git a/libavcodec/alpha/regdef.h b/libavcodec/alpha/regdef.h
index fa9ad98e5c..100594352d 100644
--- a/libavcodec/alpha/regdef.h
+++ b/libavcodec/alpha/regdef.h
@@ -63,4 +63,15 @@
#define sp $30 /* stack pointer */
#define zero $31 /* reads as zero, writes are noops */
+/* Some nicer register names. */
+#define ta t10
+#define tb t11
+#define tc t12
+#define td AT
+/* Danger: these overlap with the argument list and the return value */
+#define te a5
+#define tf a4
+#define tg a3
+#define th v0
+
#endif /* AVCODEC_ALPHA_REGDEF_H */
diff --git a/libavcodec/hpeldsp.c b/libavcodec/hpeldsp.c
index aed1cc982a..f00c449b12 100644
--- a/libavcodec/hpeldsp.c
+++ b/libavcodec/hpeldsp.c
@@ -54,6 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
hpel_funcs(avg, [3], 2);
hpel_funcs(avg_no_rnd,, 16);
+ if (ARCH_ALPHA)
+ ff_hpeldsp_init_alpha(c, flags);
if (ARCH_ARM)
ff_hpeldsp_init_arm(c, flags);
if (ARCH_BFIN)
diff --git a/libavcodec/hpeldsp.h b/libavcodec/hpeldsp.h
index d454453ed7..68190ad8c3 100644
--- a/libavcodec/hpeldsp.h
+++ b/libavcodec/hpeldsp.h
@@ -94,6 +94,7 @@ typedef struct HpelDSPContext {
void ff_hpeldsp_init(HpelDSPContext *c, int flags);
+void ff_hpeldsp_init_alpha(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_bfin(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);